From c149c916ec98e9d6a89a9d69a8d0ddb30b9cf6c9 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 15 Jun 2022 11:49:55 +0500 Subject: [PATCH 001/101] initial setup --- .gitmodules | 3 + contrib/CMakeLists.txt | 1 + contrib/base58 | 1 + contrib/base58-cmake/CMakeLists.txt | 22 +++ src/Functions/CMakeLists.txt | 4 + src/Functions/FunctionBase58Conversion.h | 179 ++++++++++++++++++++++ src/Functions/FunctionsBase58.cpp | 23 +++ src/Functions/config_functions.h.in | 1 + src/Functions/configure_config.cmake | 3 + src/Functions/registerFunctionsString.cpp | 12 ++ src/configure_config.cmake | 3 + 11 files changed, 252 insertions(+) create mode 160000 contrib/base58 create mode 100644 contrib/base58-cmake/CMakeLists.txt create mode 100644 src/Functions/FunctionBase58Conversion.h create mode 100644 src/Functions/FunctionsBase58.cpp diff --git a/.gitmodules b/.gitmodules index 55fd684fddb..568dab1eb26 100644 --- a/.gitmodules +++ b/.gitmodules @@ -268,3 +268,6 @@ [submodule "contrib/hashidsxx"] path = contrib/hashidsxx url = https://github.com/schoentoon/hashidsxx.git +[submodule "contrib/base58"] + path = contrib/base58 + url = https://github.com/Kronuz/base-x.git diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 943e0e0ebc1..a356ade7eb8 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -153,6 +153,7 @@ endif() add_contrib (sqlite-cmake sqlite-amalgamation) add_contrib (s2geometry-cmake s2geometry) +add_contrib (base58-cmake base58) # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear diff --git a/contrib/base58 b/contrib/base58 new file mode 160000 index 00000000000..a85f98fb4ed --- /dev/null +++ b/contrib/base58 @@ -0,0 +1 @@ +Subproject commit a85f98fb4ed52c2f4029a4b6ac1ef0bafdfc56f5 diff --git a/contrib/base58-cmake/CMakeLists.txt b/contrib/base58-cmake/CMakeLists.txt new file mode 100644 index 00000000000..26783e0177d --- /dev/null +++ b/contrib/base58-cmake/CMakeLists.txt @@ -0,0 +1,22 @@ +set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base58") + +set (SRCS + ${LIBRARY_DIR}/base_x.hh + ${LIBRARY_DIR}/uinteger_t.hh + ) + +add_library(_base58 ${SRCS}) + +target_include_directories(_base58 SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}) + +if (XCODE OR XCODE_VERSION) + # https://gitlab.kitware.com/cmake/cmake/issues/17457 + # Some native build systems may not like targets that have only object files, so consider adding at least one real source file + # This applies to Xcode. + if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/dummy.c") + file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c" "") + endif () + target_sources(_base58 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c") +endif () + +add_library(ch_contrib::base58 ALIAS _base58) \ No newline at end of file diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 60386908f01..bf72795aae0 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -72,6 +72,10 @@ if (TARGET ch_contrib::llvm) target_link_libraries(clickhouse_functions PRIVATE ch_contrib::llvm) endif () +if (TARGET ch_contrib::base58) + target_link_libraries(clickhouse_functions PRIVATE ch_contrib::base58) +endif() + if (TARGET ch_contrib::base64) target_link_libraries(clickhouse_functions PRIVATE ch_contrib::base64) endif() diff --git a/src/Functions/FunctionBase58Conversion.h b/src/Functions/FunctionBase58Conversion.h new file mode 100644 index 00000000000..fd1bc81842f --- /dev/null +++ b/src/Functions/FunctionBase58Conversion.h @@ -0,0 +1,179 @@ +#pragma once +#include "config_functions.h" + +#if USE_BASE58 +# include +# include +# include +# include +# include +# include +# include +# include +# include + + +namespace DB +{ +using namespace GatherUtils; + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int INCORRECT_DATA; + extern const int BAD_ARGUMENTS; +} + +struct Base58Encode +{ + static constexpr auto name = "base58Encode"; + static size_t getBufferSize(size_t string_length, size_t string_count) + { + return ((string_length - string_count) / 3 + string_count) * 4 + string_count; + } + + void process(ColumnString source, ColumnString result, std::string alphabet) + { + + } +}; + +struct Base58Decode +{ + static constexpr auto name = "base58Decode"; + + static size_t getBufferSize(size_t string_length, size_t string_count) + { + return ((string_length - string_count) / 4 + string_count) * 3 + string_count; + } +}; + +struct TryBase58Decode +{ + static constexpr auto name = "tryBase58Decode"; + + static size_t getBufferSize(size_t string_length, size_t string_count) + { + return Base58Decode::getBufferSize(string_length, string_count); + } +}; + +template +class FunctionBase58Conversion : public IFunction +{ +public: + static constexpr auto name = Func::name; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + String getName() const override + { + return Func::name; + } + + bool isVariadic() const override { return true; } + + size_t getNumberOfArguments() const override { return 0; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 1 || arguments.size() != 2) + throw Exception( + "Wrong number of arguments for function " + getName() + ": " + arguments.size() + " provided, 1 or 2 expected.", + ErrorCodes::BAD_ARGUMENTS); + + if (!isString(arguments[0].type)) + throw Exception( + "Illegal type " + arguments[0].type->getName() + " of 1st argument of function " + getName() + ". Must be String.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (!isString(arguments[1].type)) + throw Exception( + "Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName() + ". Must be String.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const ColumnPtr column_string = arguments[0].column; + const ColumnString * input = checkAndGetColumn(column_string.get()); + if (!input) + throw Exception( + "Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + + std::string alphabet = "bitcoin"; + + if (arguments.size() == 2) + { + const auto * alphabet_column = checkAndGetColumn(arguments[1].column.get()); + + if (!alphabet_column) + throw Exception("Second argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (alphabet = alphabet_column->getValue(); alphabet != "bitcoin" && alphabet != "ripple" && alphabet != "flickr" && alphabet != "gmp") + throw Exception("Second argument for function " + getName() + " must be 'bitcoin', 'ripple', 'flickr' or 'gmp'", ErrorCodes::ILLEGAL_COLUMN); + + } + + auto dst_column = ColumnString::create(); + auto & dst_data = dst_column->getChars(); + auto & dst_offsets = dst_column->getOffsets(); + + size_t reserve = Func::getBufferSize(input->getChars().size(), input->size()); + dst_data.resize(reserve); + dst_offsets.resize(input_rows_count); + + const ColumnString::Offsets & src_offsets = input->getOffsets(); + + const auto * source = input->getChars().data(); + auto * dst = dst_data.data(); + auto * dst_pos = dst; + + size_t src_offset_prev = 0; + + for (size_t row = 0; row < input_rows_count; ++row) + { + size_t srclen = src_offsets[row] - src_offset_prev - 1; + size_t outlen = 0; + + if constexpr (std::is_same_v) + { + Base58:: + } + else if constexpr (std::is_same_v) + { + } + else + { + } + + source += srclen + 1; + dst_pos += outlen; + *dst_pos = '\0'; + dst_pos += 1; + + dst_offsets[row] = dst_pos - dst; + src_offset_prev = src_offsets[row]; + } + + dst_data.resize(dst_pos - dst); + + return dst_column; + } +}; +} + +#endif diff --git a/src/Functions/FunctionsBase58.cpp b/src/Functions/FunctionsBase58.cpp new file mode 100644 index 00000000000..efd3f42c6da --- /dev/null +++ b/src/Functions/FunctionsBase58.cpp @@ -0,0 +1,23 @@ +#include +#if USE_BASE58 +#include +#include + +namespace DB +{ +void registerFunctionBase58Encode(FunctionFactory & factory) +{ + factory.registerFunction>(); +} + +void registerFunctionBase58Decode(FunctionFactory & factory) +{ + factory.registerFunction>(); +} + +void registerFunctionTryBase58Decode(FunctionFactory & factory) +{ + factory.registerFunction>(); +} +} +#endif diff --git a/src/Functions/config_functions.h.in b/src/Functions/config_functions.h.in index a693611f975..001712d5cef 100644 --- a/src/Functions/config_functions.h.in +++ b/src/Functions/config_functions.h.in @@ -2,6 +2,7 @@ // .h autogenerated by cmake! +#cmakedefine01 USE_BASE58 #cmakedefine01 USE_BASE64 #cmakedefine01 USE_SIMDJSON #cmakedefine01 USE_RAPIDJSON diff --git a/src/Functions/configure_config.cmake b/src/Functions/configure_config.cmake index 7615a2eeeaf..776996d7e17 100644 --- a/src/Functions/configure_config.cmake +++ b/src/Functions/configure_config.cmake @@ -1,6 +1,9 @@ if (TARGET ch_contrib::fastops) set(USE_FASTOPS 1) endif() +if (TARGET ch_contrib::base58) + set(USE_BASE58 1) +endif() if (TARGET ch_contrib::base64) set(USE_BASE64 1) endif() diff --git a/src/Functions/registerFunctionsString.cpp b/src/Functions/registerFunctionsString.cpp index f86043c6959..2e2975a459c 100644 --- a/src/Functions/registerFunctionsString.cpp +++ b/src/Functions/registerFunctionsString.cpp @@ -49,6 +49,12 @@ void registerFunctionBase64Decode(FunctionFactory &); void registerFunctionTryBase64Decode(FunctionFactory &); #endif +#if USE_BASE58 +void registerFunctionBase58Encode(FunctionFactory &); +void registerFunctionBase58Decode(FunctionFactory &); +void registerFunctionTryBase58Decode(FunctionFactory &); +#endif + #if USE_NLP void registerFunctionStem(FunctionFactory &); void registerFunctionSynonyms(FunctionFactory &); @@ -105,6 +111,12 @@ void registerFunctionsString(FunctionFactory & factory) registerFunctionTryBase64Decode(factory); #endif +#if USE_BASE58 + registerFunctionBase58Encode(factory); + registerFunctionBase58Decode(factory); + registerFunctionTryBase58Decode(factory); +#endif + #if USE_NLP registerFunctionStem(factory); registerFunctionSynonyms(factory); diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 519307ba28a..fc2a858e75a 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -55,6 +55,9 @@ endif() if (TARGET ch_contrib::base64) set(USE_BASE64 1) endif() +if (TARGET ch_contrib::base58) + set(USE_BASE58 1) +endif() if (TARGET ch_contrib::yaml_cpp) set(USE_YAML_CPP 1) endif() From a800158438d939555b8f8cb472c9d3ffc51523e5 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 16 Jun 2022 15:11:41 +0500 Subject: [PATCH 002/101] wip upload --- src/Functions/FunctionBase58Conversion.h | 102 ++++++++++++----------- 1 file changed, 53 insertions(+), 49 deletions(-) diff --git a/src/Functions/FunctionBase58Conversion.h b/src/Functions/FunctionBase58Conversion.h index fd1bc81842f..9aacbc4c9e7 100644 --- a/src/Functions/FunctionBase58Conversion.h +++ b/src/Functions/FunctionBase58Conversion.h @@ -8,14 +8,12 @@ # include # include # include -# include # include # include namespace DB { -using namespace GatherUtils; namespace ErrorCodes { @@ -28,14 +26,58 @@ namespace ErrorCodes struct Base58Encode { static constexpr auto name = "base58Encode"; - static size_t getBufferSize(size_t string_length, size_t string_count) - { - return ((string_length - string_count) / 3 + string_count) * 4 + string_count; - } - void process(ColumnString source, ColumnString result, std::string alphabet) + static void process(const ColumnString * input, ColumnString * dst_column, std::string& alphabet, size_t input_rows_count) { + auto & dst_data = dst_column->getChars(); + auto & dst_offsets = dst_column->getOffsets(); + size_t current_allocated_size = input->getChars().size(); + + dst_data.resize(current_allocated_size); + dst_offsets.resize(input_rows_count); + + const ColumnString::Offsets & src_offsets = input->getOffsets(); + + const auto * source = input->getChars().raw_data(); + auto * dst = dst_data.data(); + auto * dst_pos = dst; + + size_t src_offset_prev = 0; + size_t processed_size = 0; + + const auto& encoder = (alphabet == "bitcoin") ? Base58::bitcoin() : + ((alphabet == "flickr") ? Base58::flickr() : + ((alphabet == "ripple") ? Base58::ripple() : Base58::base58())); + + for (size_t row = 0; row < input_rows_count; ++row) + { + size_t srclen = src_offsets[row] - src_offset_prev - 1; + /// Why we didn't simply operate on char* here? + /// We don't know the size of the result string beforehand (it's not byte-to-byte encoding), + /// so we may need to do many resizes (the worst case -- we'll do it for each row) + /// Using std::string allows to do exponential resizes and one final resize after whole operation is complete + std::string encoded; + encoder.encode(encoded, source, srclen); + size_t outlen = encoded.size(); + + if (processed_size + outlen >= current_allocated_size) + { + current_allocated_size += current_allocated_size; + dst_data.resize(current_allocated_size); + } + + source += srclen + 1; + dst_pos += outlen; + *dst_pos = '\0'; + dst_pos += 1; + + dst_offsets[row] = dst_pos - dst; + src_offset_prev = src_offsets[row]; + processed_size += outlen; + } + + dst_data.resize(dst_pos - dst); } }; @@ -123,53 +165,15 @@ public: if (!alphabet_column) throw Exception("Second argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - if (alphabet = alphabet_column->getValue(); alphabet != "bitcoin" && alphabet != "ripple" && alphabet != "flickr" && alphabet != "gmp") - throw Exception("Second argument for function " + getName() + " must be 'bitcoin', 'ripple', 'flickr' or 'gmp'", ErrorCodes::ILLEGAL_COLUMN); + if (alphabet = alphabet_column->getValue(); + alphabet != "bitcoin" && alphabet != "ripple" && alphabet != "flickr" && alphabet != "gmp") + throw Exception("Second argument for function " + getName() + " must be 'bitcoin', 'ripple', 'gmp' or 'flickr'", ErrorCodes::ILLEGAL_COLUMN); } auto dst_column = ColumnString::create(); - auto & dst_data = dst_column->getChars(); - auto & dst_offsets = dst_column->getOffsets(); - size_t reserve = Func::getBufferSize(input->getChars().size(), input->size()); - dst_data.resize(reserve); - dst_offsets.resize(input_rows_count); - - const ColumnString::Offsets & src_offsets = input->getOffsets(); - - const auto * source = input->getChars().data(); - auto * dst = dst_data.data(); - auto * dst_pos = dst; - - size_t src_offset_prev = 0; - - for (size_t row = 0; row < input_rows_count; ++row) - { - size_t srclen = src_offsets[row] - src_offset_prev - 1; - size_t outlen = 0; - - if constexpr (std::is_same_v) - { - Base58:: - } - else if constexpr (std::is_same_v) - { - } - else - { - } - - source += srclen + 1; - dst_pos += outlen; - *dst_pos = '\0'; - dst_pos += 1; - - dst_offsets[row] = dst_pos - dst; - src_offset_prev = src_offsets[row]; - } - - dst_data.resize(dst_pos - dst); + Func::process(column_string, dst_column, alphabet, input_rows_count); return dst_column; } From c1b2b669ab8a8750be5c79a622dc863f0fdffb12 Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 17 Jun 2022 01:52:45 +0500 Subject: [PATCH 003/101] remove wrong code --- src/Functions/FunctionBase58Conversion.h | 78 +++++++++++++++++------ src/Functions/FunctionsBase58.cpp | 5 -- src/Functions/registerFunctionsString.cpp | 2 - 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/src/Functions/FunctionBase58Conversion.h b/src/Functions/FunctionBase58Conversion.h index 9aacbc4c9e7..97dc2a4f40c 100644 --- a/src/Functions/FunctionBase58Conversion.h +++ b/src/Functions/FunctionBase58Conversion.h @@ -27,7 +27,7 @@ struct Base58Encode { static constexpr auto name = "base58Encode"; - static void process(const ColumnString * input, ColumnString * dst_column, std::string& alphabet, size_t input_rows_count) + static void process(const ColumnString * input, ColumnString::MutablePtr& dst_column, std::string& alphabet, size_t input_rows_count) { auto & dst_data = dst_column->getChars(); auto & dst_offsets = dst_column->getOffsets(); @@ -48,15 +48,16 @@ struct Base58Encode const auto& encoder = (alphabet == "bitcoin") ? Base58::bitcoin() : ((alphabet == "flickr") ? Base58::flickr() : - ((alphabet == "ripple") ? Base58::ripple() : Base58::base58())); + ((alphabet == "ripple") ? Base58::ripple() : + Base58::base58())); for (size_t row = 0; row < input_rows_count; ++row) { size_t srclen = src_offsets[row] - src_offset_prev - 1; - /// Why we didn't simply operate on char* here? + /// Why we didn't use char* here? /// We don't know the size of the result string beforehand (it's not byte-to-byte encoding), /// so we may need to do many resizes (the worst case -- we'll do it for each row) - /// Using std::string allows to do exponential resizes and one final resize after whole operation is complete + /// This way we do exponential resizes and one final resize after whole operation is complete std::string encoded; encoder.encode(encoded, source, srclen); size_t outlen = encoded.size(); @@ -66,11 +67,10 @@ struct Base58Encode current_allocated_size += current_allocated_size; dst_data.resize(current_allocated_size); } + std::strcpy(reinterpret_cast(dst_pos), encoded.c_str()); source += srclen + 1; - dst_pos += outlen; - *dst_pos = '\0'; - dst_pos += 1; + dst_pos += outlen + 1; dst_offsets[row] = dst_pos - dst; src_offset_prev = src_offsets[row]; @@ -85,19 +85,57 @@ struct Base58Decode { static constexpr auto name = "base58Decode"; - static size_t getBufferSize(size_t string_length, size_t string_count) + static void process(const ColumnString * input, ColumnString::MutablePtr& dst_column, std::string& alphabet, size_t input_rows_count) { - return ((string_length - string_count) / 4 + string_count) * 3 + string_count; - } -}; + auto & dst_data = dst_column->getChars(); + auto & dst_offsets = dst_column->getOffsets(); -struct TryBase58Decode -{ - static constexpr auto name = "tryBase58Decode"; + size_t current_allocated_size = input->getChars().size(); - static size_t getBufferSize(size_t string_length, size_t string_count) - { - return Base58Decode::getBufferSize(string_length, string_count); + dst_data.resize(current_allocated_size); + dst_offsets.resize(input_rows_count); + + const ColumnString::Offsets & src_offsets = input->getOffsets(); + + const auto * source = input->getChars().raw_data(); + auto * dst = dst_data.data(); + auto * dst_pos = dst; + + size_t src_offset_prev = 0; + size_t processed_size = 0; + + const auto& decoder = (alphabet == "bitcoin") ? Base58::bitcoin() : + ((alphabet == "flickr") ? Base58::flickr() : + ((alphabet == "ripple") ? Base58::ripple() : + Base58::base58())); + + for (size_t row = 0; row < input_rows_count; ++row) + { + size_t srclen = src_offsets[row] - src_offset_prev - 1; + /// Why we didn't use char* here? + /// We don't know the size of the result string beforehand (it's not byte-to-byte encoding), + /// so we may need to do many resizes (the worst case -- we'll do it for each row) + /// This way we do exponential resizes and one final resize after whole operation is complete + std::string decoded; + decoder.decode(decoded, source, srclen); + size_t outlen = decoded.size(); + + if (processed_size + outlen >= current_allocated_size) + { + current_allocated_size += current_allocated_size; + dst_data.resize(current_allocated_size); + } + std::strcpy(reinterpret_cast(dst_pos), decoded.c_str()); + + source += srclen + 1; + dst_pos += outlen + 1; + + dst_offsets[row] = dst_pos - dst; + src_offset_prev = src_offsets[row]; + processed_size += outlen; + } + + dst_data.resize(dst_pos - dst); } }; @@ -129,9 +167,9 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() != 1 || arguments.size() != 2) + if (arguments.size() != 1 && arguments.size() != 2) throw Exception( - "Wrong number of arguments for function " + getName() + ": " + arguments.size() + " provided, 1 or 2 expected.", + "Wrong number of arguments for function " + getName() + ": 1 or 2 expected.", ErrorCodes::BAD_ARGUMENTS); if (!isString(arguments[0].type)) @@ -173,7 +211,7 @@ public: auto dst_column = ColumnString::create(); - Func::process(column_string, dst_column, alphabet, input_rows_count); + Func::process(input, dst_column, alphabet, input_rows_count); return dst_column; } diff --git a/src/Functions/FunctionsBase58.cpp b/src/Functions/FunctionsBase58.cpp index efd3f42c6da..3ccb4d790ce 100644 --- a/src/Functions/FunctionsBase58.cpp +++ b/src/Functions/FunctionsBase58.cpp @@ -14,10 +14,5 @@ void registerFunctionBase58Decode(FunctionFactory & factory) { factory.registerFunction>(); } - -void registerFunctionTryBase58Decode(FunctionFactory & factory) -{ - factory.registerFunction>(); -} } #endif diff --git a/src/Functions/registerFunctionsString.cpp b/src/Functions/registerFunctionsString.cpp index 2e2975a459c..43035ef51e7 100644 --- a/src/Functions/registerFunctionsString.cpp +++ b/src/Functions/registerFunctionsString.cpp @@ -52,7 +52,6 @@ void registerFunctionTryBase64Decode(FunctionFactory &); #if USE_BASE58 void registerFunctionBase58Encode(FunctionFactory &); void registerFunctionBase58Decode(FunctionFactory &); -void registerFunctionTryBase58Decode(FunctionFactory &); #endif #if USE_NLP @@ -114,7 +113,6 @@ void registerFunctionsString(FunctionFactory & factory) #if USE_BASE58 registerFunctionBase58Encode(factory); registerFunctionBase58Decode(factory); - registerFunctionTryBase58Decode(factory); #endif #if USE_NLP From f987f461e5da22921c753f5e5261c2cecf9f49ea Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Fri, 17 Jun 2022 15:00:32 +0500 Subject: [PATCH 004/101] fix style -- rm unused ErrorCode --- src/Functions/FunctionBase58Conversion.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/FunctionBase58Conversion.h b/src/Functions/FunctionBase58Conversion.h index 97dc2a4f40c..87a1821b2de 100644 --- a/src/Functions/FunctionBase58Conversion.h +++ b/src/Functions/FunctionBase58Conversion.h @@ -19,7 +19,6 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int INCORRECT_DATA; extern const int BAD_ARGUMENTS; } From 23f48a9fb9f9e7bfa0ff200d84d3530f7b2723ed Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 17 Jun 2022 11:44:49 +0000 Subject: [PATCH 005/101] Fix bug with nested short-circuit functions --- src/Columns/ColumnFunction.cpp | 7 +++++-- src/Functions/FunctionsLogical.cpp | 2 ++ src/Functions/if.cpp | 2 ++ src/Functions/multiIf.cpp | 2 ++ src/Functions/toColumnTypeName.cpp | 8 -------- src/Functions/toTypeName.cpp | 7 ------- ...1_nested_short_circuit_functions.reference | 20 +++++++++++++++++++ .../02321_nested_short_circuit_functions.sql | 3 +++ 8 files changed, 34 insertions(+), 17 deletions(-) create mode 100644 tests/queries/0_stateless/02321_nested_short_circuit_functions.reference create mode 100644 tests/queries/0_stateless/02321_nested_short_circuit_functions.sql diff --git a/src/Columns/ColumnFunction.cpp b/src/Columns/ColumnFunction.cpp index 6350d7b0e61..2274762a9f5 100644 --- a/src/Columns/ColumnFunction.cpp +++ b/src/Columns/ColumnFunction.cpp @@ -250,9 +250,12 @@ ColumnWithTypeAndName ColumnFunction::reduce() const "arguments but " + toString(captured) + " columns were captured.", ErrorCodes::LOGICAL_ERROR); ColumnsWithTypeAndName columns = captured_columns; - if (is_short_circuit_argument) + IFunction::ShortCircuitSettings settings; + /// Arguments of lazy executed function can also be lazy executed. + /// But we shouldn't execute arguments if this function is short circuit, + /// because it will handle lazy executed arguments by itself. + if (is_short_circuit_argument && !function->isShortCircuit(settings, args)) { - /// Arguments of lazy executed function can also be lazy executed. for (auto & col : columns) { if (const ColumnFunction * arg = checkAndGetShortCircuitArgument(col.column)) diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index be295186943..392cbac8a1b 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -545,6 +545,8 @@ ColumnPtr FunctionAnyArityLogical::executeShortCircuit(ColumnsWithTy if (Name::name != NameAnd::name && Name::name != NameOr::name) throw Exception("Function " + getName() + " doesn't support short circuit execution", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + executeColumnIfNeeded(arguments[0]); + /// Let's denote x_i' = maskedExecute(x_i, mask). /// 1) AND(x_0, x_1, x_2, ..., x_n) /// We will support mask_i = x_0 & x_1 & ... & x_i. diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 95c66c20541..d4c2fcdd779 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -987,6 +987,8 @@ private: if (last_short_circuit_argument_index == -1) return; + executeColumnIfNeeded(arguments[0]); + /// Check if condition is const or null to not create full mask from it. if ((isColumnConst(*arguments[0].column) || arguments[0].column->onlyNull()) && !arguments[0].column->empty()) { diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 7ed0ee00954..9e0ca4142e5 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -266,6 +266,8 @@ private: if (last_short_circuit_argument_index < 0) return; + executeColumnIfNeeded(arguments[0]); + /// Let's denote x_i' = maskedExecute(x_i, mask). /// multiIf(x_0, y_0, x_1, y_1, x_2, y_2, ..., x_{n-1}, y_{n-1}, y_n) /// We will support mask_i = !x_0 & !x_1 & ... & !x_i diff --git a/src/Functions/toColumnTypeName.cpp b/src/Functions/toColumnTypeName.cpp index 345de2954c4..466a8ba17c5 100644 --- a/src/Functions/toColumnTypeName.cpp +++ b/src/Functions/toColumnTypeName.cpp @@ -28,14 +28,6 @@ public: bool useDefaultImplementationForNothing() const override { return false; } - bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override - { - settings.enable_lazy_execution_for_first_argument = true; - settings.enable_lazy_execution_for_common_descendants_of_arguments = true; - settings.force_enable_lazy_execution = true; - return true; - } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } size_t getNumberOfArguments() const override diff --git a/src/Functions/toTypeName.cpp b/src/Functions/toTypeName.cpp index f3af49315ed..d9ec08642ca 100644 --- a/src/Functions/toTypeName.cpp +++ b/src/Functions/toTypeName.cpp @@ -32,13 +32,6 @@ public: bool useDefaultImplementationForNothing() const override { return false; } - bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override - { - settings.enable_lazy_execution_for_first_argument = false; - settings.enable_lazy_execution_for_common_descendants_of_arguments = true; - settings.force_enable_lazy_execution = true; - return true; - } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } diff --git a/tests/queries/0_stateless/02321_nested_short_circuit_functions.reference b/tests/queries/0_stateless/02321_nested_short_circuit_functions.reference new file mode 100644 index 00000000000..a942937b975 --- /dev/null +++ b/tests/queries/0_stateless/02321_nested_short_circuit_functions.reference @@ -0,0 +1,20 @@ +1 +1 +0 +0 +0 +1 +1 +0 +0 +0 +0 +1 +1 +1 +1 +0 +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/02321_nested_short_circuit_functions.sql b/tests/queries/0_stateless/02321_nested_short_circuit_functions.sql new file mode 100644 index 00000000000..a8ea62d3ac3 --- /dev/null +++ b/tests/queries/0_stateless/02321_nested_short_circuit_functions.sql @@ -0,0 +1,3 @@ +select number >= 0 and if(number != 0, intDiv(1, number), 1) from numbers(5); +select if(number >= 0, if(number != 0, intDiv(1, number), 1), 1) from numbers(5); +select number >= 0 and if(number = 0, 0, if(number == 1, intDiv(1, number), if(number == 2, intDiv(1, number - 1), if(number == 3, intDiv(1, number - 2), intDiv(1, number - 3))))) from numbers(10); From f4b3af091d0a24bb3f1ac8dec26f7f22ad3e8623 Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 17 Jun 2022 23:48:14 +0500 Subject: [PATCH 006/101] fix zero byte --- src/Functions/FunctionBase58Conversion.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/Functions/FunctionBase58Conversion.h b/src/Functions/FunctionBase58Conversion.h index 87a1821b2de..a0431ca47df 100644 --- a/src/Functions/FunctionBase58Conversion.h +++ b/src/Functions/FunctionBase58Conversion.h @@ -58,7 +58,8 @@ struct Base58Encode /// so we may need to do many resizes (the worst case -- we'll do it for each row) /// This way we do exponential resizes and one final resize after whole operation is complete std::string encoded; - encoder.encode(encoded, source, srclen); + if (srclen) + encoder.encode(encoded, source, srclen); size_t outlen = encoded.size(); if (processed_size + outlen >= current_allocated_size) @@ -66,10 +67,13 @@ struct Base58Encode current_allocated_size += current_allocated_size; dst_data.resize(current_allocated_size); } - std::strcpy(reinterpret_cast(dst_pos), encoded.c_str()); + if (srclen) + std::strcpy(reinterpret_cast(dst_pos), encoded.c_str()); source += srclen + 1; - dst_pos += outlen + 1; + dst_pos += outlen; + *dst_pos = '\0'; + dst_pos += 1; dst_offsets[row] = dst_pos - dst; src_offset_prev = src_offsets[row]; @@ -127,7 +131,9 @@ struct Base58Decode std::strcpy(reinterpret_cast(dst_pos), decoded.c_str()); source += srclen + 1; - dst_pos += outlen + 1; + dst_pos += outlen; + *dst_pos = '\0'; + dst_pos += 1; dst_offsets[row] = dst_pos - dst; src_offset_prev = src_offsets[row]; @@ -176,7 +182,7 @@ public: "Illegal type " + arguments[0].type->getName() + " of 1st argument of function " + getName() + ". Must be String.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - if (!isString(arguments[1].type)) + if (arguments.size() == 2 && !isString(arguments[1].type)) throw Exception( "Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName() + ". Must be String.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); From 832fd6e0a938c7a50247a6b233b7ee681dccaf14 Mon Sep 17 00:00:00 2001 From: zvonand Date: Sun, 19 Jun 2022 23:10:28 +0500 Subject: [PATCH 007/101] Added tests + minor updates --- .gitmodules | 4 +- contrib/CMakeLists.txt | 2 +- .../CMakeLists.txt | 10 +- contrib/base-x/.gitignore | 4 + contrib/base-x/.travis.yml | 36 + contrib/base-x/LICENSE | 21 + contrib/base-x/README.md | 97 + contrib/base-x/base_x.hh | 614 ++++ contrib/base-x/tests/test.cc | 30 + contrib/base-x/tests/testcases/tests.cc | 359 +++ contrib/base-x/uinteger_t.hh | 2546 +++++++++++++++++ contrib/base58 | 1 - src/Functions/CMakeLists.txt | 4 +- src/Functions/FunctionBase58Conversion.h | 46 +- src/Functions/configure_config.cmake | 2 +- .../0_stateless/02337_base58.reference | 48 + tests/queries/0_stateless/02337_base58.sql | 17 + 17 files changed, 3809 insertions(+), 32 deletions(-) rename contrib/{base58-cmake => base-x-cmake}/CMakeLists.txt (65%) create mode 100644 contrib/base-x/.gitignore create mode 100755 contrib/base-x/.travis.yml create mode 100644 contrib/base-x/LICENSE create mode 100644 contrib/base-x/README.md create mode 100644 contrib/base-x/base_x.hh create mode 100644 contrib/base-x/tests/test.cc create mode 100644 contrib/base-x/tests/testcases/tests.cc create mode 100644 contrib/base-x/uinteger_t.hh delete mode 160000 contrib/base58 create mode 100644 tests/queries/0_stateless/02337_base58.reference create mode 100644 tests/queries/0_stateless/02337_base58.sql diff --git a/.gitmodules b/.gitmodules index 568dab1eb26..e1960d2144b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -268,6 +268,6 @@ [submodule "contrib/hashidsxx"] path = contrib/hashidsxx url = https://github.com/schoentoon/hashidsxx.git -[submodule "contrib/base58"] - path = contrib/base58 +[submodule "contrib/base-x"] + path = contrib/base-x url = https://github.com/Kronuz/base-x.git diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index a356ade7eb8..2ade6c139f6 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -153,7 +153,7 @@ endif() add_contrib (sqlite-cmake sqlite-amalgamation) add_contrib (s2geometry-cmake s2geometry) -add_contrib (base58-cmake base58) +add_contrib (base-x-cmake base-x) # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear diff --git a/contrib/base58-cmake/CMakeLists.txt b/contrib/base-x-cmake/CMakeLists.txt similarity index 65% rename from contrib/base58-cmake/CMakeLists.txt rename to contrib/base-x-cmake/CMakeLists.txt index 26783e0177d..48cb54d307f 100644 --- a/contrib/base58-cmake/CMakeLists.txt +++ b/contrib/base-x-cmake/CMakeLists.txt @@ -1,13 +1,13 @@ -set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base58") +set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base-x") set (SRCS ${LIBRARY_DIR}/base_x.hh ${LIBRARY_DIR}/uinteger_t.hh ) -add_library(_base58 ${SRCS}) +add_library(_base-x ${SRCS}) -target_include_directories(_base58 SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}) +target_include_directories(_base-x SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}) if (XCODE OR XCODE_VERSION) # https://gitlab.kitware.com/cmake/cmake/issues/17457 @@ -16,7 +16,7 @@ if (XCODE OR XCODE_VERSION) if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/dummy.c") file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c" "") endif () - target_sources(_base58 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c") + target_sources(_base-x PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c") endif () -add_library(ch_contrib::base58 ALIAS _base58) \ No newline at end of file +add_library(ch_contrib::base-x ALIAS _base-x) \ No newline at end of file diff --git a/contrib/base-x/.gitignore b/contrib/base-x/.gitignore new file mode 100644 index 00000000000..b63b40c8b71 --- /dev/null +++ b/contrib/base-x/.gitignore @@ -0,0 +1,4 @@ +.DS_Store +test +*.o +*.dSYM \ No newline at end of file diff --git a/contrib/base-x/.travis.yml b/contrib/base-x/.travis.yml new file mode 100755 index 00000000000..f55132e614f --- /dev/null +++ b/contrib/base-x/.travis.yml @@ -0,0 +1,36 @@ +sudo: false + +language: cpp + +compiler: + - clang + - gcc + +addons: + apt: + sources: + - ubuntu-toolchain-r-test + - llvm-toolchain-precise-3.8 + packages: + - g++-6 + - clang-3.8 + +install: + - if [ "$CXX" = "g++" ]; then export CXX="g++-6"; fi + - if [ "$CXX" == "clang++" ]; then export CXX="clang++-3.8"; fi + - sudo apt-get install -qq git cmake + +before_script: + # not much better than git submodules, but there was never a need/want for the repo in this repo + - cd .. + - git clone https://github.com/google/googletest.git + - cd googletest + - git reset --hard d62d6c6556d96dda924382547c54a4b3afedb22c + - cmake CMakeLists.txt + - make + + - cd ../base-x/tests + - make + +script: + - make run diff --git a/contrib/base-x/LICENSE b/contrib/base-x/LICENSE new file mode 100644 index 00000000000..f7b3408abac --- /dev/null +++ b/contrib/base-x/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2017 German Mendez Bravo (Kronuz) @ german dot mb at gmail.com + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/contrib/base-x/README.md b/contrib/base-x/README.md new file mode 100644 index 00000000000..5dc4a068043 --- /dev/null +++ b/contrib/base-x/README.md @@ -0,0 +1,97 @@ +# base-x [![License][license-img]][license-url] [![GitHub Stars][stars-img]][stars-url] [![GitHub Forks][forks-img]][forks-url] [![GitHub Watchers][watchers-img]][watchers-url] [![Tweet][tweet-img]][tweet-url] + +[![Build Status](https://travis-ci.org/Kronuz/base-x.svg?branch=master)](https://travis-ci.org/Kronuz/base-x) + + +### BaseX encoder / decoder for C++ + +This is a fast base encoder / decoder of any given alphabet. + + +#### Example + +``` cpp +// example.cc +// g++ -std=c++14 -o example example.cc + +#include +#include "base_x.hh" + +int main() { + auto encoded = Base58::base58().encode("Hello world!"); + + std::cout << encoded << std::endl; + // => 1LDlk6QWOejX6rPrJ + + return 0; +} +``` + + +#### Compilation + +* g++ and clang++ are supported. +* C++14 is required. + + +### Alphabets + +See below for a list of commonly recognized alphabets, and their respective base. + +Base | Factory | Alphabet +-----|---------------------|------------- + 2 | base2::base2() | `01` + 2 | base8::base8() | `01234567` + 11 | bas11::bas11() | `0123456789a` + 16 | base16::base16() | `0123456789abcdef` + 32 | base32::base32() | `0123456789ABCDEFGHJKMNPQRSTVWXYZ` + 36 | base36::base36() | `0123456789abcdefghijklmnopqrstuvwxyz` + 58 | base58::base58() | `123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz` + 58 | base58::bitcoin() | `123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz` + 58 | base58::gmp() | `0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuv` + 58 | base58::ripple() | `rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz` + 58 | base58::flickr() | `123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ` + 62 | base62::base62() | `0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz` + 62 | base62::inverted() | `0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ` + 64 | base64::base64() | `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/` + 64 | base64::urlsafe() | `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_` + 66 | base66::base66() | `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.!~` + + +### How it works + +It encodes octet arrays by doing long divisions on all significant digits in the +array, creating a representation of that number in the new base. + +**If you need standard hex encoding, or base64 encoding, this module is NOT +appropriate.** + + +## Author +[**German Mendez Bravo (Kronuz)**](https://kronuz.io/) + +[![Follow on GitHub][github-follow-img]][github-follow-url] +[![Follow on Twitter][twitter-follow-img]][twitter-follow-url] + + +## License + +MIT License. See [LICENSE](LICENSE) for details. + +Copyright (c) 2017 German Mendez Bravo (Kronuz) @ german dot mb at gmail.com + + +[license-url]: https://github.com/Kronuz/base-x/blob/master/LICENSE +[license-img]: https://img.shields.io/github/license/Kronuz/base-x.svg +[stars-url]: https://github.com/Kronuz/base-x/stargazers +[stars-img]: https://img.shields.io/github/stars/Kronuz/base-x.svg?style=social&label=Stars +[forks-url]: https://github.com/Kronuz/base-x/network/members +[forks-img]: https://img.shields.io/github/forks/Kronuz/base-x.svg?style=social&label=Forks +[watchers-url]: https://github.com/Kronuz/base-x/watchers +[watchers-img]: https://img.shields.io/github/watchers/Kronuz/base-x.svg?style=social&label=Watchers +[tweet-img]: https://img.shields.io/twitter/url/https/github.com/Kronuz/base-x.svg?style=social +[tweet-url]: https://twitter.com/intent/tweet?text=Base-X+encoding%2Fdecoding+for+modern+C%2B%2B+by+%40germbravo:&url=https%3A%2F%2Fgithub.com%2FKronuz%2Fbase-x +[github-follow-url]: https://github.com/Kronuz +[github-follow-img]: https://img.shields.io/github/followers/Kronuz.svg?style=social&label=Follow +[twitter-follow-url]: https://twitter.com/intent/follow?screen_name=germbravo +[twitter-follow-img]: https://img.shields.io/twitter/follow/germbravo.svg?style=social&label=Follow diff --git a/contrib/base-x/base_x.hh b/contrib/base-x/base_x.hh new file mode 100644 index 00000000000..fdc06fead2f --- /dev/null +++ b/contrib/base-x/base_x.hh @@ -0,0 +1,614 @@ +/* +base_x.hh +BaseX encoder / decoder for C++ + +Copyright (c) 2017 German Mendez Bravo (Kronuz) @ german dot mb at gmail.com + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef __BASE_X__H_ +#define __BASE_X__H_ + +#include // for std::find_if, std::reverse +#include // for std::invalid_argument +#include // for std::string +#include // for std::enable_if_t + +#include "uinteger_t.hh" + + +class BaseX { + char _chr[256]; + int _ord[256]; + + const int size; + const int alphabet_base; + const unsigned base_size; + const unsigned alphabet_base_bits; + const unsigned block_size; + const uinteger_t::digit alphabet_base_mask; + const unsigned padding_size; + const char padding; + const int flags; + + constexpr char chr(unsigned char ord) const { + return _chr[ord]; + } + + constexpr int ord(unsigned char chr) const { + return _ord[chr]; + } + +public: + static constexpr int ignore_case = (1 << 0); + static constexpr int with_checksum = (1 << 1); + static constexpr int with_check = (1 << 2); + static constexpr int block_padding = (1 << 3); + + template + constexpr BaseX(int flgs, const char (&alphabet)[alphabet_size1], const char (&extended)[extended_size1], const char (&padding_string)[padding_size1], const char (&translate)[translate_size1]) : + _chr(), + _ord(), + size(alphabet_size1 - 1 + extended_size1 - 1), + alphabet_base(alphabet_size1 - 1), + base_size(uinteger_t::base_size(alphabet_base)), + alphabet_base_bits(uinteger_t::base_bits(alphabet_base)), + block_size((flgs & BaseX::block_padding) ? alphabet_base_bits : 0), + alphabet_base_mask(alphabet_base - 1), + padding_size(padding_size1 - 1), + padding(padding_size ? padding_string[0] : '\0'), + flags(flgs) + { + for (int c = 0; c < 256; ++c) { + _chr[c] = 0; + _ord[c] = alphabet_base; + } + for (int cp = 0; cp < alphabet_base; ++cp) { + auto ch = alphabet[cp]; + _chr[cp] = ch; + ASSERT(_ord[(unsigned char)ch] == alphabet_base); // Duplicate character in the alphabet + _ord[(unsigned char)ch] = cp; + if (flags & BaseX::ignore_case) { + if (ch >= 'A' && ch <='Z') { + _ord[(unsigned char)ch - 'A' + 'a'] = cp; + } else if (ch >= 'a' && ch <='z') { + _ord[(unsigned char)ch - 'a' + 'A'] = cp; + } + } + } + for (std::size_t i = 0; i < extended_size1 - 1; ++i) { + auto ch = extended[i]; + auto cp = alphabet_base + i; + _chr[cp] = ch; + ASSERT(_ord[(unsigned char)ch] == alphabet_base); // Duplicate character in the extended alphabet + _ord[(unsigned char)ch] = cp; + if (flags & BaseX::ignore_case) { + if (ch >= 'A' && ch <='Z') { + _ord[(unsigned char)ch - 'A' + 'a'] = cp; + } else if (ch >= 'a' && ch <='z') { + _ord[(unsigned char)ch - 'a' + 'A'] = cp; + } + } + } + int cp = -1; + for (std::size_t i = 0; i < translate_size1 - 1; ++i) { + auto ch = translate[i]; + auto ncp = _ord[(unsigned char)ch]; + if (ncp >= alphabet_base) { + ASSERT(_ord[(unsigned char)ch] == alphabet_base); // Invalid translation character + _ord[(unsigned char)ch] = cp; + if (flags & BaseX::ignore_case) { + if (ch >= 'A' && ch <='Z') { + _ord[(unsigned char)ch - 'A' + 'a'] = cp; + } else if (ch >= 'a' && ch <='z') { + _ord[(unsigned char)ch - 'a' + 'A'] = cp; + } + } + } else { + cp = ncp; + } + } + } + + // Get string representation of value + template ::value>> + void encode(Result& result, const uinteger_t& input) const { + std::size_t bp = 0; + uinteger_t quotient; + if (block_size) { + bp = ((input.bits() + 7) & 0xf8) % block_size; + bp = bp ? (block_size - bp) % block_size : 0; + if (bp) { + quotient = input << bp; + } + } + const uinteger_t& num = bp ? quotient : input; + auto num_sz = num.size(); + if (num_sz) { + int sum = 0; + result.reserve(num_sz * base_size); + if (alphabet_base_bits) { + std::size_t shift = 0; + auto ptr = reinterpret_cast(num.data()); + uinteger_t::digit v = *ptr++; + v <<= uinteger_t::half_digit_bits; + for (auto i = num_sz * 2 - 1; i; --i) { + v >>= uinteger_t::half_digit_bits; + v |= (static_cast(*ptr++) << uinteger_t::half_digit_bits); + do { + auto d = static_cast((v >> shift) & alphabet_base_mask); + result.push_back(chr(d)); + shift += alphabet_base_bits; + sum += d; + } while (shift <= uinteger_t::half_digit_bits); + shift -= uinteger_t::half_digit_bits; + } + v >>= (shift + uinteger_t::half_digit_bits); + while (v) { + auto d = static_cast(v & alphabet_base_mask); + result.push_back(chr(d)); + v >>= alphabet_base_bits; + sum += d; + } + auto s = chr(0); + auto rit_f = std::find_if(result.rbegin(), result.rend(), [s](const char& c) { return c != s; }); + result.resize(result.rend() - rit_f); // shrink + } else { + uinteger_t uint_base = alphabet_base; + if (!bp) { + quotient = num; + } + do { + auto r = quotient.divmod(uint_base); + auto d = static_cast(r.second); + result.push_back(chr(d)); + quotient = std::move(r.first); + sum += d; + } while (quotient); + } + std::reverse(result.begin(), result.end()); + if (padding_size) { + Result p; + p.resize((padding_size - (result.size() % padding_size)) % padding_size, padding); + result.append(p); + } + if (flags & BaseX::with_check) { + auto chk = static_cast(num % size); + result.push_back(chr(chk)); + sum += chk; + } + if (flags & BaseX::with_checksum) { + auto sz = result.size(); + sz = (sz + sz / size) % size; + sum += sz; + sum = (size - sum % size) % size; + result.push_back(chr(sum)); + } + } else { + result.push_back(chr(0)); + } + } + + template ::value>> + Result encode(const uinteger_t& num) const { + Result result; + encode(result, num); + return result; + } + + template ::value>> + void encode(Result& result, const unsigned char* decoded, std::size_t decoded_size) const { + encode(result, uinteger_t(decoded, decoded_size, 256)); + } + + template ::value>> + Result encode(const unsigned char* decoded, std::size_t decoded_size) const { + Result result; + encode(result, uinteger_t(decoded, decoded_size, 256)); + return result; + } + + template ::value>> + void encode(Result& result, const char* decoded, std::size_t decoded_size) const { + encode(result, uinteger_t(decoded, decoded_size, 256)); + } + + template ::value>> + Result encode(const char* decoded, std::size_t decoded_size) const { + Result result; + encode(result, uinteger_t(decoded, decoded_size, 256)); + return result; + } + + template ::value>> + void encode(Result& result, T (&s)[N]) const { + encode(result, s, N - 1); + } + + template ::value>> + Result encode(T (&s)[N]) const { + Result result; + encode(result, s, N - 1); + return result; + } + + template ::value>> + void encode(Result& result, const std::string& binary) const { + return encode(result, binary.data(), binary.size()); + } + + template ::value>> + Result encode(const std::string& binary) const { + Result result; + encode(result, binary.data(), binary.size()); + return result; + } + + void decode(uinteger_t& result, const char* encoded, std::size_t encoded_size) const { + result = 0; + int sum = 0; + int sumsz = 0; + int direction = 1; + + auto sz = encoded_size; + if (flags & BaseX::with_checksum) --sz; + if (flags & BaseX::with_check) --sz; + + int bp = 0; + + if (alphabet_base_bits) { + for (; sz; --sz, encoded += direction) { + auto c = *encoded; + if (c == padding) break; + auto d = ord(static_cast(c)); + if (d < 0) continue; // ignored character + if (d >= alphabet_base) { + throw std::invalid_argument("Error: Invalid character: '" + std::string(1, c) + "' at " + std::to_string(encoded_size - sz)); + } + sum += d; + ++sumsz; + result = (result << alphabet_base_bits) | d; + bp += block_size; + } + } else { + uinteger_t uint_base = alphabet_base; + for (; sz; --sz, encoded += direction) { + auto c = *encoded; + if (c == padding) break; + auto d = ord(static_cast(c)); + if (d < 0) continue; // ignored character + if (d >= alphabet_base) { + throw std::invalid_argument("Error: Invalid character: '" + std::string(1, c) + "' at " + std::to_string(encoded_size - sz)); + } + sum += d; + ++sumsz; + result = (result * uint_base) + d; + bp += block_size; + } + } + + for (; sz && *encoded == padding; --sz, ++encoded); + + result >>= (bp & 7); + + if (flags & BaseX::with_check) { + auto c = *encoded; + auto d = ord(static_cast(c)); + if (d < 0 || d >= size) { + throw std::invalid_argument("Error: Invalid character: '" + std::string(1, c) + "' at " + std::to_string(encoded_size - sz)); + } + auto chk = static_cast(result % size); + if (d != chk) { + throw std::invalid_argument("Error: Invalid check"); + } + sum += chk; + ++sumsz; + ++encoded; + } + + if (flags & BaseX::with_checksum) { + auto c = *encoded; + auto d = ord(static_cast(c)); + if (d < 0 || d >= size) { + throw std::invalid_argument("Error: Invalid character: '" + std::string(1, c) + "' at " + std::to_string(encoded_size - sz)); + } + sum += d; + sum += (sumsz + sumsz / size) % size; + if (sum % size) { + throw std::invalid_argument("Error: Invalid checksum"); + } + } + } + + template ::value>> + void decode(Result& result, const char* encoded, std::size_t encoded_size) const { + uinteger_t num; + decode(num, encoded, encoded_size); + result = num.template str(256); + } + + template ::value or std::is_integral::value>> + Result decode(const char* encoded, std::size_t encoded_size) const { + Result result; + decode(result, encoded, encoded_size); + return result; + } + + template ::value or std::is_integral::value>> + void decode(Result& result, T (&s)[N]) const { + decode(result, s, N - 1); + } + + template ::value or std::is_integral::value>> + Result decode(T (&s)[N]) const { + Result result; + decode(result, s, N - 1); + return result; + } + + template ::value or std::is_integral::value>> + void decode(Result& result, const std::string& encoded) const { + decode(result, encoded.data(), encoded.size()); + } + + template ::value or std::is_integral::value>> + Result decode(const std::string& encoded) const { + Result result; + decode(result, encoded.data(), encoded.size()); + return result; + } + + bool is_valid(const char* encoded, std::size_t encoded_size) const { + int sum = 0; + int sumsz = 0; + if (flags & BaseX::with_checksum) --sumsz; + for (; encoded_size; --encoded_size, ++encoded) { + auto d = ord(static_cast(*encoded)); + if (d < 0) continue; // ignored character + if (d >= alphabet_base) { + return false; + } + sum += d; + ++sumsz; + } + if (flags & BaseX::with_checksum) { + sum += (sumsz + sumsz / size) % size; + if (sum % size) { + return false; + } + } + return true; + } + + template + bool is_valid(T (&s)[N]) const { + return is_valid(s, N - 1); + } + + bool is_valid(const std::string& encoded) const { + return is_valid(encoded.data(), encoded.size()); + } +}; + +// base2 +struct Base2 { + static const BaseX& base2() { + static constexpr BaseX encoder(0, "01", "", "", ""); + return encoder; + } + static const BaseX& base2chk() { + static constexpr BaseX encoder(BaseX::with_checksum, "01", "", "", ""); + return encoder; + } +}; + +// base8 +struct Base8 { + static const BaseX& base8() { + static constexpr BaseX encoder(0, "01234567", "", "", ""); + return encoder; + } + static const BaseX& base8chk() { + static constexpr BaseX encoder(BaseX::with_checksum, "01234567", "", "", ""); + return encoder; + } +}; + +// base11 +struct Base11 { + static const BaseX& base11() { + static constexpr BaseX encoder(BaseX::ignore_case, "0123456789a", "", "", ""); + return encoder; + } + static const BaseX& base11chk() { + static constexpr BaseX encoder(BaseX::ignore_case | BaseX::with_checksum, "0123456789a", "", "", ""); + return encoder; + } +}; + +// base16 +struct Base16 { + static const BaseX& base16() { + static constexpr BaseX encoder(BaseX::ignore_case, "0123456789abcdef", "", "", ""); + return encoder; + } + static const BaseX& base16chk() { + static constexpr BaseX encoder(BaseX::ignore_case | BaseX::with_checksum, "0123456789abcdef", "", "", ""); + return encoder; + } + static const BaseX& rfc4648() { + static constexpr BaseX encoder(0, "0123456789ABCDEF", "", "", ""); + return encoder; + } +}; + +// base32 +struct Base32 { + static const BaseX& base32() { + static constexpr BaseX encoder(BaseX::ignore_case, "0123456789abcdefghijklmnopqrstuv", "", "", ""); + return encoder; + } + static const BaseX& base32chk() { + static constexpr BaseX encoder(BaseX::ignore_case | BaseX::with_checksum, "0123456789abcdefghijklmnopqrstuv", "", "", ""); + return encoder; + } + static const BaseX& crockford() { + static constexpr BaseX encoder(BaseX::ignore_case, "0123456789ABCDEFGHJKMNPQRSTVWXYZ", "", "", "-0O1IL"); + return encoder; + } + static const BaseX& crockfordchk() { + static constexpr BaseX encoder(BaseX::ignore_case | BaseX::with_check, "0123456789ABCDEFGHJKMNPQRSTVWXYZ", "*~$=U", "", "-0O1IL"); + return encoder; + } + static const BaseX& rfc4648() { + static constexpr BaseX encoder(BaseX::block_padding, "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", "", "========", "\n\r"); + return encoder; + } + static const BaseX& rfc4648hex() { + static constexpr BaseX encoder(BaseX::block_padding, "0123456789ABCDEFGHIJKLMNOPQRSTUV", "", "========", "\n\r"); + return encoder; + } +}; + +// base36 +struct Base36 { + static const BaseX& base36() { + static constexpr BaseX encoder(BaseX::ignore_case, "0123456789abcdefghijklmnopqrstuvwxyz", "", "", ""); + return encoder; + } + static const BaseX& base36chk() { + static constexpr BaseX encoder(BaseX::ignore_case | BaseX::with_checksum, "0123456789abcdefghijklmnopqrstuvwxyz", "", "", ""); + return encoder; + } +}; + +// base58 +struct Base58 { + static const BaseX& base58() { + static constexpr BaseX encoder(0, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuv", "", "", ""); + return encoder; + } + static const BaseX& base58chk() { + static constexpr BaseX encoder(BaseX::with_checksum, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuv", "", "", ""); + return encoder; + } + static const BaseX& bitcoin() { + static constexpr BaseX encoder(0, "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz", "", "", ""); + return encoder; + } + static const BaseX& bitcoinchk() { + static constexpr BaseX encoder(BaseX::with_checksum, "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz", "", "", ""); + return encoder; + } + static const BaseX& ripple() { + static constexpr BaseX encoder(0, "rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz", "", "", ""); + return encoder; + } + static const BaseX& ripplechk() { + static constexpr BaseX encoder(BaseX::with_checksum, "rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz", "", "", ""); + return encoder; + } + static const BaseX& flickr() { + static constexpr BaseX encoder(0, "123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ", "", "", ""); + return encoder; + } + static const BaseX& flickrchk() { + static constexpr BaseX encoder(BaseX::with_checksum, "123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ", "", "", ""); + return encoder; + } +}; + +// base59 +struct Base59 { + static const BaseX& base59() { + static constexpr BaseX encoder(0, "23456789abcdefghijklmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ", "", "", "l1IO0"); + return encoder; + } + static const BaseX& base59chk() { + static constexpr BaseX encoder(BaseX::with_checksum, "23456789abcdefghijklmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ", "", "", "l1IO0"); + return encoder; + } + static const BaseX& dubaluchk() { + static constexpr BaseX encoder(BaseX::with_checksum, "zy9MalDxwpKLdvW2AtmscgbYUq6jhP7E53TiXenZRkVCrouBH4GSQf8FNJO", "", "", "-l1IO0"); + return encoder; + } +}; + +// base62 +struct Base62 { + static const BaseX& base62() { + static constexpr BaseX encoder(0, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "", "", ""); + return encoder; + } + static const BaseX& base62chk() { + static constexpr BaseX encoder(BaseX::with_checksum, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "", "", ""); + return encoder; + } + static const BaseX& inverted() { + static constexpr BaseX encoder(0, "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", "", "", ""); + return encoder; + } + static const BaseX& invertedchk() { + static constexpr BaseX encoder(BaseX::with_checksum, "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", "", "", ""); + return encoder; + } +}; + +// base64 +struct Base64 { + static const BaseX& base64() { + static constexpr BaseX encoder(0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", "", "", ""); + return encoder; + } + static const BaseX& base64chk() { + static constexpr BaseX encoder(BaseX::with_checksum, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", "", "", ""); + return encoder; + } + static const BaseX& url() { + static constexpr BaseX encoder(0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", "", "", ""); + return encoder; + } + static const BaseX& urlchk() { + static constexpr BaseX encoder(BaseX::with_checksum, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", "", "", ""); + return encoder; + } + static const BaseX& rfc4648() { + static constexpr BaseX encoder(BaseX::block_padding, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", "", "====", "\n\r"); + return encoder; + } + static const BaseX& rfc4648url() { + static constexpr BaseX encoder(BaseX::block_padding, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", "", "====", "\n\r"); + return encoder; + } +}; + +// base66 +struct Base66 { + static const BaseX& base66() { + static constexpr BaseX encoder(0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.!~", "", "", ""); + return encoder; + } + static const BaseX& base66chk() { + static constexpr BaseX encoder(BaseX::with_checksum, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.!~", "", "", ""); + return encoder; + } +}; + +#endif diff --git a/contrib/base-x/tests/test.cc b/contrib/base-x/tests/test.cc new file mode 100644 index 00000000000..d47d211173e --- /dev/null +++ b/contrib/base-x/tests/test.cc @@ -0,0 +1,30 @@ +/* +The MIT License (MIT) + +Copyright (c) 2017 German Mendez Bravo (Kronuz) @ german dot mb at gmail.com + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +int main(int argc, char * argv[]){ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/contrib/base-x/tests/testcases/tests.cc b/contrib/base-x/tests/testcases/tests.cc new file mode 100644 index 00000000000..c5bebfc8288 --- /dev/null +++ b/contrib/base-x/tests/testcases/tests.cc @@ -0,0 +1,359 @@ +/* +The MIT License (MIT) + +Copyright (c) 2017 German Mendez Bravo (Kronuz) @ german dot mb at gmail.com + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "base_x.hh" + + +static constexpr BaseX test_base2(0, "01", "", "", ""); +static constexpr BaseX test_base16(0, "0123456789abcdef", "", "", ""); +static constexpr BaseX test_base58(0, "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz", "", "", ""); + + +TEST(UUID, Encode) { + EXPECT_EQ(Base62::base62().encode("\330\105\140\310\23\117\21\346\241\342\64\66\73\322\155\256"), "6a630O1jrtMjCrQDyG3D3O"); + EXPECT_EQ(Base58::bitcoin().encode("\330\105\140\310\23\117\21\346\241\342\64\66\73\322\155\256"), "ThxCy1Ek2q6UhWQhj9CK1o"); + EXPECT_EQ(Base58::base58().encode("\330\105\140\310\23\117\21\346\241\342\64\66\73\322\155\256"), "QetBu0Dh1m5ReTNeg8BI0k"); +} + +TEST(BaseX, checksums) { + EXPECT_EQ(Base64::base64().encode("Hello world!"), "SGVsbG8gd29ybGQh"); + EXPECT_EQ(Base64::base64chk().encode("Hello world!"), "SGVsbG8gd29ybGQhG"); + + EXPECT_EQ(Base64::base64().decode("SGVsbG8gd29ybGQh"), "Hello world!"); + EXPECT_EQ(Base64::base64chk().decode("SGVsbG8gd29ybGQhG"), "Hello world!"); + + EXPECT_EQ(Base62::base62().encode("Hello world!"), "T8dgcjRGuYUueWht"); + EXPECT_EQ(Base62::base62chk().encode("Hello world!"), "T8dgcjRGuYUueWhtE"); + + EXPECT_EQ(Base62::base62().decode("T8dgcjRGuYUueWht"), "Hello world!"); + EXPECT_EQ(Base62::base62chk().decode("T8dgcjRGuYUueWhtE"), "Hello world!"); + + EXPECT_EQ(Base62::base62chk().is_valid("T8dgcjRGuYUueWhtE"), true); + EXPECT_EQ(Base62::base62chk().is_valid("Some random text!"), false); +} + +TEST(base16, Encoder) { + EXPECT_EQ(Base16::base16().encode("A"), "41"); + EXPECT_EQ(Base16::base16().encode("AB"), "4142"); + EXPECT_EQ(Base16::base16().encode("ABC"), "414243"); + EXPECT_EQ(Base16::base16().encode("ABCD"), "41424344"); + EXPECT_EQ(Base16::base16().encode("ABCDE"), "4142434445"); + EXPECT_EQ(Base16::base16().encode("ABCDEF"), "414243444546"); + + EXPECT_EQ(Base16::rfc4648().encode("A"), "41"); + EXPECT_EQ(Base16::rfc4648().encode("AB"), "4142"); + EXPECT_EQ(Base16::rfc4648().encode("ABC"), "414243"); + EXPECT_EQ(Base16::rfc4648().encode("ABCD"), "41424344"); + EXPECT_EQ(Base16::rfc4648().encode("ABCDE"), "4142434445"); + EXPECT_EQ(Base16::rfc4648().encode("ABCDEF"), "414243444546"); +} + +TEST(base16, Decoder) { + EXPECT_EQ(Base16::base16().decode("41"), "A"); + EXPECT_EQ(Base16::base16().decode("4142"), "AB"); + EXPECT_EQ(Base16::base16().decode("414243"), "ABC"); + EXPECT_EQ(Base16::base16().decode("41424344"), "ABCD"); + EXPECT_EQ(Base16::base16().decode("4142434445"), "ABCDE"); + EXPECT_EQ(Base16::base16().decode("414243444546"), "ABCDEF"); + + EXPECT_EQ(Base16::rfc4648().decode("41"), "A"); + EXPECT_EQ(Base16::rfc4648().decode("4142"), "AB"); + EXPECT_EQ(Base16::rfc4648().decode("414243"), "ABC"); + EXPECT_EQ(Base16::rfc4648().decode("41424344"), "ABCD"); + EXPECT_EQ(Base16::rfc4648().decode("4142434445"), "ABCDE"); + EXPECT_EQ(Base16::rfc4648().decode("414243444546"), "ABCDEF"); +} + +TEST(base32, Encoder) { + // Note base64() encoding is NOT the same as the standard (rfc4648) + EXPECT_EQ(Base32::base32().encode("A"), "21"); + EXPECT_EQ(Base32::base32().encode("AB"), "ga2"); + EXPECT_EQ(Base32::base32().encode("ABC"), "42gi3"); + EXPECT_EQ(Base32::base32().encode("ABCD"), "10k4gq4"); + EXPECT_EQ(Base32::base32().encode("ABCDE"), "85146h25"); + EXPECT_EQ(Base32::base32().encode("ABCDEF"), "21891k8ha6"); + EXPECT_EQ(Base32::base32().encode("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "21891k8ha68t44iiib9h6ksjqga5956l2lapblgmaq"); + + EXPECT_EQ(Base32::rfc4648().encode("A"), "IE======"); + EXPECT_EQ(Base32::rfc4648().encode("AB"), "IFBA===="); + EXPECT_EQ(Base32::rfc4648().encode("ABC"), "IFBEG==="); + EXPECT_EQ(Base32::rfc4648().encode("ABCD"), "IFBEGRA="); + EXPECT_EQ(Base32::rfc4648().encode("ABCDE"), "IFBEGRCF"); + EXPECT_EQ(Base32::rfc4648().encode("ABCDEF"), "IFBEGRCFIY======"); + EXPECT_EQ(Base32::rfc4648().encode("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "IFBEGRCFIZDUQSKKJNGE2TSPKBIVEU2UKVLFOWCZLI======"); + + EXPECT_EQ(Base32::crockford().encode(519571), "FVCK"); + EXPECT_EQ(Base32::crockfordchk().encode(1234), "16JD"); + EXPECT_EQ(Base32::crockfordchk().encode("Hello World"), "28CNP6RVS0AXQQ4V348"); +} + +TEST(base32, Decoder) { + // Note base64() encoding is NOT the same as the standard (rfc4648) + EXPECT_EQ(Base32::base32().decode("21"), "A"); + EXPECT_EQ(Base32::base32().decode("ga2"), "AB"); + EXPECT_EQ(Base32::base32().decode("42gi3"), "ABC"); + EXPECT_EQ(Base32::base32().decode("10k4gq4"), "ABCD"); + EXPECT_EQ(Base32::base32().decode("85146h25"), "ABCDE"); + EXPECT_EQ(Base32::base32().decode("21891k8ha6"), "ABCDEF"); + EXPECT_EQ(Base32::base32().decode("21891k8ha68t44iiib9h6ksjqga5956l2lapblgmaq"), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + + EXPECT_EQ(Base32::rfc4648().decode("IE======"), "A"); + EXPECT_EQ(Base32::rfc4648().decode("IFBA===="), "AB"); + EXPECT_EQ(Base32::rfc4648().decode("IFBEG==="), "ABC"); + EXPECT_EQ(Base32::rfc4648().decode("IFBEGRA="), "ABCD"); + EXPECT_EQ(Base32::rfc4648().decode("IFBEGRCF"), "ABCDE"); + EXPECT_EQ(Base32::rfc4648().decode("IFBEGRCFIY======"), "ABCDEF"); + EXPECT_EQ(Base32::rfc4648().decode("IFBEGRCFIZDUQSKKJNGE2TSPKBIVEU2UKVLFOWCZLI======"), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + + EXPECT_EQ(Base32::crockford().decode("FVCK"), 519571); + EXPECT_EQ(Base32::crockfordchk().is_valid("16JD"), true); + EXPECT_EQ(Base32::crockfordchk().decode("16JD"), 1234); + + EXPECT_EQ(Base32::crockfordchk().decode("2-8cn-p6r-vso-axq-q4v-348"), "Hello World"); +} + +TEST(base58, Encoder) { + EXPECT_EQ(Base58::base58().decode("1TFvCj"), 987654321); + EXPECT_EQ(Base58::base58().encode(987654321), "1TFvCj"); + EXPECT_EQ(Base58::base58().encode("Hello world!"), "1LDlk6QWOejX6rPrJ"); + EXPECT_EQ(Base58::bitcoin().encode("Hello world!"), "2NEpo7TZRhna7vSvL"); +} + +TEST(base62, Encoder) { + EXPECT_EQ(Base62::base62().decode("14q60P"), 987654321); + EXPECT_EQ(Base62::base62().encode(987654321), "14q60P"); + EXPECT_EQ(Base62::base62().encode("Hello world!"), "T8dgcjRGuYUueWht"); + EXPECT_EQ(Base62::inverted().encode("Hello world!"), "t8DGCJrgUyuUEwHT"); +} + +TEST(base64, Encoder) { + // Note Base64 encoding is NOT the same as the standard (rfc4648) + EXPECT_EQ(Base64::base64().encode("A"), "BB"); + EXPECT_EQ(Base64::base64().encode("AB"), "EFC"); + EXPECT_EQ(Base64::base64().encode("ABC"), "QUJD"); + EXPECT_EQ(Base64::base64().encode("ABCD"), "BBQkNE"); + EXPECT_EQ(Base64::base64().encode("ABCDE"), "EFCQ0RF"); + EXPECT_EQ(Base64::base64().encode("ABCDEF"), "QUJDREVG"); + EXPECT_EQ(Base64::base64().encode("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "EFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFla"); + + EXPECT_EQ(Base64::rfc4648().encode("A"), "QQ=="); + EXPECT_EQ(Base64::rfc4648().encode("AB"), "QUI="); + EXPECT_EQ(Base64::rfc4648().encode("ABC"), "QUJD"); + EXPECT_EQ(Base64::rfc4648().encode("ABCD"), "QUJDRA=="); + EXPECT_EQ(Base64::rfc4648().encode("ABCDE"), "QUJDREU="); + EXPECT_EQ(Base64::rfc4648().encode("ABCDEF"), "QUJDREVG"); + EXPECT_EQ(Base64::rfc4648().encode("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVo="); +} + +TEST(base64, Decoder) { + // Note Base64 encoding is NOT the same as the standard (rfc4648) + EXPECT_EQ(Base64::base64().decode("BB"), "A"); + EXPECT_EQ(Base64::base64().decode("EFC"), "AB"); + EXPECT_EQ(Base64::base64().decode("QUJD"), "ABC"); + EXPECT_EQ(Base64::base64().decode("BBQkNE"), "ABCD"); + EXPECT_EQ(Base64::base64().decode("EFCQ0RF"), "ABCDE"); + EXPECT_EQ(Base64::base64().decode("QUJDREVG"), "ABCDEF"); + EXPECT_EQ(Base64::base64().decode("EFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFla"), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + + EXPECT_EQ(Base64::rfc4648().decode("QQ=="), "A"); + EXPECT_EQ(Base64::rfc4648().decode("QUI="), "AB"); + EXPECT_EQ(Base64::rfc4648().decode("QUJD"), "ABC"); + EXPECT_EQ(Base64::rfc4648().decode("QUJDRA=="), "ABCD"); + EXPECT_EQ(Base64::rfc4648().decode("QUJDREU="), "ABCDE"); + EXPECT_EQ(Base64::rfc4648().decode("QUJDREVG"), "ABCDEF"); + EXPECT_EQ(Base64::rfc4648().decode("QUJDREVG\nR0hJSktM\nTU5PUFFS\nU1RVVldY\nWVo="), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); +} + +TEST(base58, ShouldEncodeAndDecodeIntegers) { + auto data = 987654321; + + auto gmpEncoded = Base58::base58().encode(data); + auto bitcoinEncoded = Base58::bitcoin().encode(data); + auto rippleEncoded = Base58::ripple().encode(data); + auto flickrEncoded = Base58::flickr().encode(data); + + EXPECT_EQ(gmpEncoded, "1TFvCj"); + EXPECT_EQ(bitcoinEncoded, "2WGzDn"); + EXPECT_EQ(rippleEncoded, "pWGzD8"); + EXPECT_EQ(flickrEncoded, "2vgZdM"); + + auto gmpDecoded = Base58::base58().decode(gmpEncoded); + auto bitcoinDecoded = Base58::bitcoin().decode(bitcoinEncoded); + auto rippleDecoded = Base58::ripple().decode(rippleEncoded); + auto flickrDecoded = Base58::flickr().decode(flickrEncoded); + + EXPECT_EQ(gmpDecoded, data); + EXPECT_EQ(bitcoinDecoded, data); + EXPECT_EQ(rippleDecoded, data); + EXPECT_EQ(flickrDecoded, data); + + auto encoded = Base58::base58().encode(data); + auto decoded = Base58::base58().decode(encoded); + + EXPECT_EQ(decoded, data); +} + +TEST(base58, LongText) { + auto data = "Lorem ipsum dolor consectetur."; + + auto gmpEncoded = Base58::base58().encode(data); + auto bitcoinEncoded = Base58::bitcoin().encode(data); + auto rippleEncoded = Base58::ripple().encode(data); + auto flickrEncoded = Base58::flickr().encode(data); + + EXPECT_EQ(gmpEncoded, "FIHZQEpJ739QdqChX1PkgTBqP1FaDgJWQiGvY92YA"); + EXPECT_EQ(bitcoinEncoded, "GKJcTFtL84ATguDka2SojWCuS2GdEjLZTmHzbA3bB"); + EXPECT_EQ(rippleEncoded, "GKJcTEtL3hwTguDk2pSojWUuSpGdNjLZTmHzbwsbB"); + EXPECT_EQ(flickrEncoded, "gjiBsfTk84asFUdKz2rNJvcUr2gCeJkysLhZAa3Ab"); + + auto gmpDecoded = Base58::base58().decode(gmpEncoded); + auto bitcoinDecoded = Base58::bitcoin().decode(bitcoinEncoded); + auto rippleDecoded = Base58::ripple().decode(rippleEncoded); + auto flickrDecoded = Base58::flickr().decode(flickrEncoded); + + EXPECT_EQ(gmpDecoded, data); + EXPECT_EQ(bitcoinDecoded, data); + EXPECT_EQ(rippleDecoded, data); + EXPECT_EQ(flickrDecoded, data); +} + +TEST(base58, Tests) { + EXPECT_EQ(test_base2.encode(uinteger_t("000f", 16)), "1111"); + // EXPECT_EQ(test_base2.encode(uinteger_t("00ff", 16)), "011111111"); // ->> + EXPECT_EQ(test_base2.encode(uinteger_t("00ff", 16)), "11111111"); + EXPECT_EQ(test_base2.encode(uinteger_t("0fff", 16)), "111111111111"); + EXPECT_EQ(test_base2.encode(uinteger_t("ff00ff00", 16)), "11111111000000001111111100000000"); + // EXPECT_EQ(test_base16.encode(uinteger_t("0000000f", 16)), "000f"); // ->> + EXPECT_EQ(test_base16.encode(uinteger_t("0000000f", 16)), "f"); + // EXPECT_EQ(test_base16.encode(uinteger_t("000fff", 16)), "0fff"); // ->> + EXPECT_EQ(test_base16.encode(uinteger_t("000fff", 16)), "fff"); + EXPECT_EQ(test_base16.encode(uinteger_t("ffff", 16)), "ffff"); + // EXPECT_EQ(test_base58.encode(uinteger_t("", 16)), ""); // ->> + EXPECT_EQ(test_base58.encode(uinteger_t("", 16)), "1"); + EXPECT_EQ(test_base58.encode(uinteger_t("61", 16)), "2g"); + EXPECT_EQ(test_base58.encode(uinteger_t("626262", 16)), "a3gV"); + EXPECT_EQ(test_base58.encode(uinteger_t("636363", 16)), "aPEr"); + EXPECT_EQ(test_base58.encode(uinteger_t("73696d706c792061206c6f6e6720737472696e67", 16)), "2cFupjhnEsSn59qHXstmK2ffpLv2"); + // EXPECT_EQ(test_base58.encode(uinteger_t("00eb15231dfceb60925886b67d065299925915aeb172c06647", 16)), "1NS17iag9jJgTHD1VXjvLCEnZuQ3rJDE9L"); // ->> + EXPECT_EQ(test_base58.encode(uinteger_t("00eb15231dfceb60925886b67d065299925915aeb172c06647", 16)), "NS17iag9jJgTHD1VXjvLCEnZuQ3rJDE9L"); + EXPECT_EQ(test_base58.encode(uinteger_t("516b6fcd0f", 16)), "ABnLTmg"); + EXPECT_EQ(test_base58.encode(uinteger_t("bf4f89001e670274dd", 16)), "3SEo3LWLoPntC"); + EXPECT_EQ(test_base58.encode(uinteger_t("572e4794", 16)), "3EFU7m"); + EXPECT_EQ(test_base58.encode(uinteger_t("ecac89cad93923c02321", 16)), "EJDM8drfXA6uyA"); + EXPECT_EQ(test_base58.encode(uinteger_t("10c8511e", 16)), "Rt5zm"); + // EXPECT_EQ(test_base58.encode(uinteger_t("00000000000000000000", 16)), "1111111111"); // ->> + EXPECT_EQ(test_base58.encode(uinteger_t("00000000000000000000", 16)), "1"); + EXPECT_EQ(test_base58.encode(uinteger_t("801184cd2cdd640ca42cfc3a091c51d549b2f016d454b2774019c2b2d2e08529fd206ec97e", 16)), "5Hx15HFGyep2CfPxsJKe2fXJsCVn5DEiyoeGGF6JZjGbTRnqfiD"); + // EXPECT_EQ(test_base58.encode(uinteger_t("003c176e659bea0f29a3e9bf7880c112b1b31b4dc826268187", 16)), "16UjcYNBG9GTK4uq2f7yYEbuifqCzoLMGS"); // ->> + EXPECT_EQ(test_base58.encode(uinteger_t("003c176e659bea0f29a3e9bf7880c112b1b31b4dc826268187", 16)), "6UjcYNBG9GTK4uq2f7yYEbuifqCzoLMGS"); + EXPECT_EQ(test_base58.encode(uinteger_t("ffffffffffffffffffff", 16)), "FPBt6CHo3fovdL"); + EXPECT_EQ(test_base58.encode(uinteger_t("ffffffffffffffffffffffffff", 16)), "NKioeUVktgzXLJ1B3t"); + EXPECT_EQ(test_base58.encode(uinteger_t("ffffffffffffffffffffffffffffffff", 16)), "YcVfxkQb6JRzqk5kF2tNLv"); + EXPECT_EQ(test_base2.encode(uinteger_t("fb6f9ac3", 16)), "11111011011011111001101011000011"); + EXPECT_EQ(test_base2.encode(uinteger_t("179eea7a", 16)), "10111100111101110101001111010"); + EXPECT_EQ(test_base2.encode(uinteger_t("6db825db", 16)), "1101101101110000010010111011011"); + EXPECT_EQ(test_base2.encode(uinteger_t("93976aa7", 16)), "10010011100101110110101010100111"); + EXPECT_EQ(test_base58.encode(uinteger_t("ef41b9ce7e830af7", 16)), "h26E62FyLQN"); + EXPECT_EQ(test_base58.encode(uinteger_t("606cbc791036d2e9", 16)), "H8Sa62HVULG"); + EXPECT_EQ(test_base58.encode(uinteger_t("bdcb0ea69c2c8ec8", 16)), "YkESUPpnfoD"); + EXPECT_EQ(test_base58.encode(uinteger_t("1a2358ba67fb71d5", 16)), "5NaBN89ajtQ"); + EXPECT_EQ(test_base58.encode(uinteger_t("e6173f0f4d5fb5d7", 16)), "fVAoezT1ZkS"); + EXPECT_EQ(test_base58.encode(uinteger_t("91c81cbfdd58bbd2", 16)), "RPGNSU3bqTX"); + EXPECT_EQ(test_base58.encode(uinteger_t("329e0bf0e388dbfe", 16)), "9U41ZkwwysT"); + EXPECT_EQ(test_base58.encode(uinteger_t("30b10393210fa65b", 16)), "99NMW3WHjjY"); + EXPECT_EQ(test_base58.encode(uinteger_t("ab3bdd18e3623654", 16)), "VeBbqBb4rCT"); + EXPECT_EQ(test_base58.encode(uinteger_t("fe29d1751ec4af8a", 16)), "jWhmYLN9dUm"); + EXPECT_EQ(test_base58.encode(uinteger_t("c1273ab5488769807d", 16)), "3Tbh4kL3WKW6g"); + EXPECT_EQ(test_base58.encode(uinteger_t("6c7907904de934f852", 16)), "2P5jNYhfpTJxy"); + EXPECT_EQ(test_base58.encode(uinteger_t("05f0be055db47a0dc9", 16)), "5PN768Kr5oEp"); + EXPECT_EQ(test_base58.encode(uinteger_t("3511e6206829b35b12", 16)), "gBREojGaJ6DF"); + EXPECT_EQ(test_base58.encode(uinteger_t("d1c7c2ddc4a459d503", 16)), "3fsekq5Esq2KC"); + EXPECT_EQ(test_base58.encode(uinteger_t("1f88efd17ab073e9a1", 16)), "QHJbmW9ZY7jn"); + EXPECT_EQ(test_base58.encode(uinteger_t("0f45dadf4e64c5d5c2", 16)), "CGyVUMmCKLRf"); + EXPECT_EQ(test_base58.encode(uinteger_t("de1e5c5f718bb7fafa", 16)), "3pyy8U7w3KUa5"); + EXPECT_EQ(test_base58.encode(uinteger_t("123190b93e9a49a46c", 16)), "ES3DeFrG1zbd"); + EXPECT_EQ(test_base58.encode(uinteger_t("8bee94a543e7242e5a", 16)), "2nJnuWyLpGf6y"); + EXPECT_EQ(test_base58.encode(uinteger_t("9fd5f2285362f5cfd834", 16)), "9yqFhqeewcW3pF"); + EXPECT_EQ(test_base58.encode(uinteger_t("6987bac63ad23828bb31", 16)), "6vskE5Y1LhS3U4"); + EXPECT_EQ(test_base58.encode(uinteger_t("19d4a0f9d459cc2a08b0", 16)), "2TAsHPuaLhh5Aw"); + EXPECT_EQ(test_base58.encode(uinteger_t("a1e47ffdbea5a807ab26", 16)), "A6XzPgSUJDf1W5"); + EXPECT_EQ(test_base58.encode(uinteger_t("35c231e5b3a86a9b83db", 16)), "42B8reRwPAAoAa"); + EXPECT_EQ(test_base58.encode(uinteger_t("b2351012a48b8347c351", 16)), "B1hPyomGx4Vhqa"); + EXPECT_EQ(test_base58.encode(uinteger_t("71d402694dd9517ea653", 16)), "7Pv2SyAQx2Upu8"); + EXPECT_EQ(test_base58.encode(uinteger_t("55227c0ec7955c2bd6e8", 16)), "5nR64BkskyjHMq"); + EXPECT_EQ(test_base58.encode(uinteger_t("17b3d8ee7907c1be34df", 16)), "2LEg7TxosoxTGS"); + EXPECT_EQ(test_base58.encode(uinteger_t("7e7bba7b68bb8e95827f", 16)), "879o2ATGnmYyAW"); + EXPECT_EQ(test_base58.encode(uinteger_t("db9c13f5ba7654b01407fb", 16)), "wTYfxjDVbiks874"); + EXPECT_EQ(test_base58.encode(uinteger_t("6186449d20f5fd1e6c4393", 16)), "RBeiWhzZNL6VtMG"); + EXPECT_EQ(test_base58.encode(uinteger_t("5248751cebf4ad1c1a83c3", 16)), "MQSVNnc8ehFCqtW"); + EXPECT_EQ(test_base58.encode(uinteger_t("32090ef18cd479fc376a74", 16)), "DQdu351ExDaeYeX"); + EXPECT_EQ(test_base58.encode(uinteger_t("7cfa5d6ed1e467d986c426", 16)), "XzW67T5qfEnFcaZ"); + EXPECT_EQ(test_base58.encode(uinteger_t("9d8707723c7ede51103b6d", 16)), "g4eTCg6QJnB1UU4"); + EXPECT_EQ(test_base58.encode(uinteger_t("6f4d1e392d6a9b4ed8b223", 16)), "Ubo7kZY5aDpAJp2"); + EXPECT_EQ(test_base58.encode(uinteger_t("38057d98797cd39f80a0c9", 16)), "EtjQ2feamJvuqse"); + EXPECT_EQ(test_base58.encode(uinteger_t("de7e59903177e20880e915", 16)), "xB2N7yRBnDYEoT2"); + EXPECT_EQ(test_base58.encode(uinteger_t("b2ea24a28bc4a60b5c4b8d", 16)), "mNFMpJ2P3TGYqhv"); + EXPECT_EQ(test_base58.encode(uinteger_t("cf84938958589b6ffba6114d", 16)), "4v8ZbsGh2ePz5sipt"); + EXPECT_EQ(test_base58.encode(uinteger_t("dee13be7b8d8a08c94a3c02a", 16)), "5CwmE9jQqwtHkTF45"); + EXPECT_EQ(test_base58.encode(uinteger_t("14cb9c6b3f8cd2e02710f569", 16)), "Pm85JHVAAdeUdxtp"); + EXPECT_EQ(test_base58.encode(uinteger_t("ca3f2d558266bdcc44c79cb5", 16)), "4pMwomBAQHuUnoLUC"); + EXPECT_EQ(test_base58.encode(uinteger_t("c031215be44cbad745f38982", 16)), "4dMeTrcxiVw9RWvj3"); + EXPECT_EQ(test_base58.encode(uinteger_t("1435ab1dbc403111946270a5", 16)), "P7wX3sCWNrbqhBEC"); + EXPECT_EQ(test_base58.encode(uinteger_t("d8c6e4d775e7a66a0d0f9f41", 16)), "56GLoRDGWGuGJJwPN"); + EXPECT_EQ(test_base58.encode(uinteger_t("dcee35e74f0fd74176fce2f4", 16)), "5Ap1zyuYiJJFwWcMR"); + EXPECT_EQ(test_base58.encode(uinteger_t("bfcc0ca4b4855d1cf8993fc0", 16)), "4cvafQW4PEhARKv9D"); + EXPECT_EQ(test_base58.encode(uinteger_t("e02a3ac25ece7b54584b670a", 16)), "5EMM28xkpxZ1kkVUM"); + EXPECT_EQ(test_base58.encode(uinteger_t("fe4d938fc3719f064cabb4bfff", 16)), "NBXKkbHwrAsiWTLAk6"); + EXPECT_EQ(test_base58.encode(uinteger_t("9289cb4f6b15c57e6086b87ea5", 16)), "DCvDpjEXEbHjZqskKv"); + EXPECT_EQ(test_base58.encode(uinteger_t("fc266f35626b3612bfe978537b", 16)), "N186PVoBWrNre35BGE"); + EXPECT_EQ(test_base58.encode(uinteger_t("33ff08c06d92502bf258c07166", 16)), "5LC4SoW6jmTtbkbePw"); + EXPECT_EQ(test_base58.encode(uinteger_t("6a81cac1f3666bc59dc67b1c3c", 16)), "9sXgUySUzwiqDU5WHy"); + EXPECT_EQ(test_base58.encode(uinteger_t("9dfb8e7e744c544c0f323ea729", 16)), "EACsmGmkgcwsrPFzLg"); + EXPECT_EQ(test_base58.encode(uinteger_t("1e7a1e284f70838b38442b682b", 16)), "3YEVk9bE7rw5qExMkv"); + EXPECT_EQ(test_base58.encode(uinteger_t("2a862ad57901a8235f5dc74eaf", 16)), "4YS259nuTLfeXa5Wuc"); + EXPECT_EQ(test_base58.encode(uinteger_t("74c82096baef21f9d3089e5462", 16)), "AjAcKEhUfrqm8smvM7"); + EXPECT_EQ(test_base58.encode(uinteger_t("7a3edbc23d7b600263920261cc", 16)), "BBZXyRgey5S5DDZkcK"); + EXPECT_EQ(test_base58.encode(uinteger_t("20435664c357d25a9c8df751cf4f", 16)), "CrwNL6Fbv4pbRx1zd9g"); + EXPECT_EQ(test_base58.encode(uinteger_t("51a7aa87cf5cb1c12d045ec3422d", 16)), "X27NHGgKXmGzzQvDtpC"); + EXPECT_EQ(test_base58.encode(uinteger_t("344d2e116aa26f1062a2cb6ebbef", 16)), "LEDLDvL1Hg4qt1efVXt"); + EXPECT_EQ(test_base58.encode(uinteger_t("6941add7be4c0b5c7163e4928f8e", 16)), "fhMyN6gwoxE3uYraVzV"); + EXPECT_EQ(test_base58.encode(uinteger_t("10938fcbb7c4ab991649734a14bf", 16)), "76TPrSDxzGQfSzMu974"); + EXPECT_EQ(test_base58.encode(uinteger_t("eafe04d944ba504e9af9117b07de", 16)), "2VPgov563ryfe4L2Bj6M"); + EXPECT_EQ(test_base58.encode(uinteger_t("58d0aeed4d35da20b6f052127edf", 16)), "ZenZhXF9YwP8nQvNtNz"); + EXPECT_EQ(test_base58.encode(uinteger_t("d734984e2f5aecf25f7a3e353f8a", 16)), "2N7n3jFsTdyN49Faoq6h"); + EXPECT_EQ(test_base58.encode(uinteger_t("57d873fdb405b7daf4bafa62068a", 16)), "ZJ7NwoP4wHvwyZg3Wjs"); + EXPECT_EQ(test_base58.encode(uinteger_t("bda4ec7b40d0d65ca95dec4c4d3b", 16)), "2CijxjsNyvqTwPCfDcpA"); + EXPECT_EQ(test_base58.encode(uinteger_t("826c4abdceb1b91f0d4ad665f86d2e", 16)), "4edfvuDQu9KzVxLuXHfMo"); + EXPECT_EQ(test_base58.encode(uinteger_t("e7ecb35d07e65b960cb10574a4f51a", 16)), "7VLRYdB4cToipp2J2p3v9"); + EXPECT_EQ(test_base58.encode(uinteger_t("4f2d72ead87b31d6869fba39eac6dc", 16)), "3DUjqJRcfdWhpsrLrGcQs"); + EXPECT_EQ(test_base58.encode(uinteger_t("8b4f5788d60030950d5dfbf94c585d", 16)), "4u44JSRH5jP5X39YhPsmE"); + EXPECT_EQ(test_base58.encode(uinteger_t("ee4c0a0025d1a74ace9fe349355cc5", 16)), "7fgACjABRQUGUEpN6VBBA"); + EXPECT_EQ(test_base58.encode(uinteger_t("58ac05b9a0b4b66083ff1d489b8d84", 16)), "3UtJPyTwGXapcxHx8Rom5"); + EXPECT_EQ(test_base58.encode(uinteger_t("1aa35c05e1132e8e049aafaef035d8", 16)), "kE2eSU7gM2619pT82iGP"); + EXPECT_EQ(test_base58.encode(uinteger_t("771b0c28608484562a292e5d5d2b30", 16)), "4LGYeWhyfrjUByibUqdVR"); + EXPECT_EQ(test_base58.encode(uinteger_t("78ff9a0e56f9e88dc1cd654b40d019", 16)), "4PLggs66qAdbmZgkaPihe"); + EXPECT_EQ(test_base58.encode(uinteger_t("6d691bdd736346aa5a0a95b373b2ab", 16)), "44Y6qTgSvRMkdqpQ5ufkN"); +} diff --git a/contrib/base-x/uinteger_t.hh b/contrib/base-x/uinteger_t.hh new file mode 100644 index 00000000000..901460f75c4 --- /dev/null +++ b/contrib/base-x/uinteger_t.hh @@ -0,0 +1,2546 @@ +/* +uinteger_t.hh +An arbitrary precision unsigned integer type for C++ + +Copyright (c) 2017 German Mendez Bravo (Kronuz) @ german dot mb at gmail.com +Copyright (c) 2013 - 2017 Jason Lee @ calccrypto at gmail.com + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +With much help from Auston Sterling + +Thanks to Stefan Deigmüller for finding +a bug in operator*. + +Thanks to François Dessenne for convincing me +to do a general rewrite of this class. + +Germán Mández Bravo (Kronuz) converted Jason Lee's uint128_t +to header-only and extended to arbitrary bit length. +*/ + +#ifndef __uint_t__ +#define __uint_t__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ASSERT assert + +// Compatibility inlines +#ifndef __has_builtin // Optional of course +#define __has_builtin(x) 0 // Compatibility with non-clang compilers +#endif + +#if defined _MSC_VER +# define HAVE___ADDCARRY_U64 +# define HAVE___SUBBORROW_U64 +# define HAVE___ADDCARRY_U32 +# define HAVE___SUBBORROW_U32 +# define HAVE___ADDCARRY_U16 +# define HAVE___SUBBORROW_U16 +# define HAVE___UMUL128 +# define HAVE___UMUL64 +# define HAVE___UMUL32 +# include +#endif + +#if (defined(__clang__) && __has_builtin(__builtin_clzll)) || (defined(__GNUC__ ) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))) +# define HAVE____BUILTIN_CLZLL +#endif +#if (defined(__clang__) && __has_builtin(__builtin_clzl)) || (defined(__GNUC__ ) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))) +# define HAVE____BUILTIN_CLZL +#endif +#if (defined(__clang__) && __has_builtin(__builtin_clz)) || (defined(__GNUC__ ) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))) +# define HAVE____BUILTIN_CLZ +#endif +#if (defined(__clang__) && __has_builtin(__builtin_addcll)) +# define HAVE____BUILTIN_ADDCLL +#endif +#if (defined(__clang__) && __has_builtin(__builtin_addcl)) +# define HAVE____BUILTIN_ADDCL +#endif +#if (defined(__clang__) && __has_builtin(__builtin_addc)) +# define HAVE____BUILTIN_ADDC +#endif +#if (defined(__clang__) && __has_builtin(__builtin_subcll)) +# define HAVE____BUILTIN_SUBCLL +#endif +#if (defined(__clang__) && __has_builtin(__builtin_subcl)) +# define HAVE____BUILTIN_SUBCL +#endif +#if (defined(__clang__) && __has_builtin(__builtin_subc)) +# define HAVE____BUILTIN_SUBC +#endif + +#if defined __SIZEOF_INT128__ +#define HAVE____INT128_T +#endif + + +#ifndef DIGIT_T +#define DIGIT_T std::uint64_t +#endif + +#ifndef HALF_DIGIT_T +#define HALF_DIGIT_T std::uint32_t +#endif + +class uinteger_t; + +namespace std { // This is probably not a good idea + // Give uinteger_t type traits + template <> struct is_arithmetic : std::true_type {}; + template <> struct is_integral : std::true_type {}; + template <> struct is_unsigned : std::true_type {}; +} + +class uinteger_t { +public: + using digit = DIGIT_T; + using half_digit = HALF_DIGIT_T; + + static constexpr std::size_t digit_octets = sizeof(digit); // number of octets per digit + static constexpr std::size_t digit_bits = digit_octets * 8; // number of bits per digit + static constexpr std::size_t half_digit_octets = sizeof(half_digit); // number of octets per half_digit + static constexpr std::size_t half_digit_bits = half_digit_octets * 8; // number of bits per half_digit + + using container = std::vector; + + template + struct is_result { + static const bool value = false; + }; + + template + struct is_result> { + static const bool value = true; + }; + + template + struct is_result> { + static const bool value = true; + }; + +private: + static_assert(digit_octets == half_digit_octets * 2, "half_digit must be exactly half the size of digit"); + + static constexpr std::size_t karatsuba_cutoff = 1024 / digit_bits; + static constexpr double growth_factor = 1.5; + + std::size_t _begin; + std::size_t _end; + container _value_instance; + container& _value; + bool _carry; + +public: + // Window to vector (uses _begin and _end) + + void reserve(std::size_t sz) { + _value.reserve(sz + _begin); + } + + std::size_t grow(std::size_t n) { + // expands the vector using a growth factor + // and returns the new capacity. + auto cc = _value.capacity(); + if (n >= cc) { + cc = n * growth_factor; + _value.reserve(cc); + } + return cc; + } + + void resize(std::size_t sz) { + grow(sz + _begin); + _value.resize(sz + _begin); + } + + void resize(std::size_t sz, const digit& c) { + grow(sz + _begin); + _value.resize(sz + _begin, c); + } + + void clear() { + _value.clear(); + _begin = 0; + _end = 0; + _carry = false; + } + + digit* data() noexcept { + return _value.data() + _begin; + } + + const digit* data() const noexcept { + return _value.data() + _begin; + } + + std::size_t size() const noexcept { + return _end ? _end - _begin : _value.size() - _begin; + } + + void prepend(std::size_t sz, const digit& c) { + // Efficiently prepend by growing backwards by growth factor + auto min = std::min(_begin, sz); + if (min) { + // If there is some space before `_begin`, we try using it first: + _begin -= min; + std::fill_n(_value.begin() + _begin, min, c); + sz -= min; + } + if (sz) { + ASSERT(_begin == 0); // _begin should be 0 in here + // If there's still more room needed, we grow the vector: + // Ex.: grow using prepend(3, y) + // sz = 3 + // _begin = 0 (B) + // _end = 1 (E) + // initially (capacity == 12): + // |xxxxxxxxxx- | + // B E + // after reclaiming space after `_end` (same capacity == 12): + // |xxxxxxxxxx | + // B + // _end = 0 + // csz = 10 + // grow returns the new capacity (22) + // isz = 12 (22 - 10) + // _begin = 9 (12 - 3) + // after (capacity == (12 + 3) * 1.5 == 22): + // |---------yyyxxxxxxxxxx| + // B + if (_end) { + // reclaim space after `_end` + _value.resize(_end); + _end = 0; + } + auto csz = _value.size(); + auto isz = grow(csz + sz) - csz; + _value.insert(_value.begin(), isz, c); + _begin = isz - sz; + } + } + + void prepend(const digit& c) { + prepend(1, c); + } + + void prepend(const uinteger_t& num) { + prepend(num.size(), 0); + std::copy(num.begin(), num.end(), begin()); + } + + void append(std::size_t sz, const digit& c) { + // Efficiently append by growing by growth factor + if (_end) { + // reclaim space after `_end` + _value.resize(_end); + _end = 0; + } + auto nsz = _value.size() + sz; + grow(nsz); + _value.resize(nsz, c); + } + + void append(const digit& c) { + append(1, c); + } + + void append(const uinteger_t& num) { + auto sz = num.size(); + append(sz, 0); + std::copy(num.begin(), num.end(), end() - sz); + } + + container::iterator begin() noexcept { + return _value.begin() + _begin; + } + + container::const_iterator begin() const noexcept { + return _value.cbegin() + _begin; + } + + container::iterator end() noexcept { + return _end ? _value.begin() + _end : _value.end(); + } + + container::const_iterator end() const noexcept { + return _end ? _value.cbegin() + _end : _value.cend(); + } + + container::reverse_iterator rbegin() noexcept { + return _end ? container::reverse_iterator(_value.begin() + _end) : _value.rbegin(); + } + + container::const_reverse_iterator rbegin() const noexcept { + return _end ? container::const_reverse_iterator(_value.cbegin() + _end) : _value.crbegin(); + } + + container::reverse_iterator rend() noexcept { + return container::reverse_iterator(_value.begin() + _begin); + } + + container::const_reverse_iterator rend() const noexcept { + return container::const_reverse_iterator(_value.cbegin() + _begin); + } + + container::reference front() { + return *begin(); + } + + container::const_reference front() const { + return *begin(); + } + + container::reference back() { + return *rbegin(); + } + + container::const_reference back() const { + return *rbegin(); + } + +private: + // Optimized primitives for operations + + static digit _bits(digit x) { + #if defined HAVE____BUILTIN_CLZLL + if (digit_octets == sizeof(unsigned long long)) { + return x ? digit_bits - __builtin_clzll(x) : 1; + } + #endif + #if defined HAVE____BUILTIN_CLZL + if (digit_octets == sizeof(unsigned long)) { + return x ? digit_bits - __builtin_clzl(x) : 1; + } + #endif + #if defined HAVE____BUILTIN_CLZ + if (digit_octets == sizeof(unsigned)) { + return x ? digit_bits - __builtin_clz(x) : 1; + } + #endif + { + digit c = x ? 0 : 1; + while (x) { + x >>= 1; + ++c; + } + return c; + } + } + + static digit _mult(digit x, digit y, digit* lo) { + #if defined HAVE___UMUL128 + if (digit_bits == 64) { + digit h; + digit l = _umul128(x, y, &h); // _umul128(x, y, *hi) -> lo + return h; + } + #endif + #if defined HAVE___UMUL64 + if (digit_bits == 32) { + digit h; + digit l = _umul64(x, y, &h); // _umul64(x, y, *hi) -> lo + return h; + } + #endif + #if defined HAVE___UMUL32 + if (digit_bits == 16) { + digit h; + digit l = _umul32(x, y, &h); // _umul32(x, y, *hi) -> lo + return h; + } + #endif + #if defined HAVE____INT128_T + if (digit_bits == 64) { + auto r = static_cast<__uint128_t>(x) * static_cast<__uint128_t>(y); + *lo = r; + return r >> digit_bits; + } + #endif + if (digit_bits == 64) { + digit x0 = x & 0xffffffffUL; + digit x1 = x >> 32; + digit y0 = y & 0xffffffffUL; + digit y1 = y >> 32; + + digit u = (x0 * y0); + digit v = (x1 * y0) + (u >> 32); + digit w = (x0 * y1) + (v & 0xffffffffUL); + + *lo = (w << 32) + (u & 0xffffffffUL); // low + return (x1 * y1) + (v >> 32) + (w >> 32); // high + } if (digit_bits == 32) { + auto r = static_cast(x) * static_cast(y); + *lo = r; + return r >> 32; + } if (digit_bits == 16) { + auto r = static_cast(x) * static_cast(y); + *lo = r; + return r >> 16; + } if (digit_bits == 8) { + auto r = static_cast(x) * static_cast(y); + *lo = r; + return r >> 8; + } + } + + static digit _multadd(digit x, digit y, digit a, digit c, digit* lo) { + #if defined HAVE___UMUL128 && defined HAVE___ADDCARRY_U64 + if (digit_bits == 64) { + digit h; + digit l = _umul128(x, y, &h); // _umul128(x, y, *hi) -> lo + return h + _addcarry_u64(c, l, a, lo); // _addcarry_u64(carryin, x, y, *sum) -> carryout + } + #endif + #if defined HAVE___UMUL64 && defined HAVE___ADDCARRY_U32 + if (digit_bits == 32) { + digit h; + digit l = _umul64(x, y, &h); // _umul64(x, y, *hi) -> lo + return h + _addcarry_u32(c, l, a, lo); // _addcarry_u32(carryin, x, y, *sum) -> carryout + } + #endif + #if defined HAVE___UMUL32 && defined HAVE___ADDCARRY_U16 + if (digit_bits == 16) { + digit h; + digit l = _umul32(x, y, &h); // _umul32(x, y, *hi) -> lo + return h + _addcarry_u16(c, l, a, lo); // _addcarry_u16(carryin, x, y, *sum) -> carryout + } + #endif + #if defined HAVE____INT128_T + if (digit_bits == 64) { + auto r = static_cast<__uint128_t>(x) * static_cast<__uint128_t>(y) + static_cast<__uint128_t>(a) + static_cast<__uint128_t>(c); + *lo = r; + return r >> digit_bits; + } + #endif + if (digit_bits == 64) { + digit x0 = x & 0xffffffffUL; + digit x1 = x >> 32; + digit y0 = y & 0xffffffffUL; + digit y1 = y >> 32; + + digit u = (x0 * y0) + (a & 0xffffffffUL) + (c & 0xffffffffUL); + digit v = (x1 * y0) + (u >> 32) + (a >> 32) + (c >> 32); + digit w = (x0 * y1) + (v & 0xffffffffUL); + + *lo = (w << 32) + (u & 0xffffffffUL); // low + return (x1 * y1) + (v >> 32) + (w >> 32); // high + } + if (digit_bits == 32) { + auto r = static_cast(x) * static_cast(y) + static_cast(a) + static_cast(c); + *lo = r; + return r >> 32; + } + if (digit_bits == 16) { + auto r = static_cast(x) * static_cast(y) + static_cast(a) + static_cast(c); + *lo = r; + return r >> 16; + } + if (digit_bits == 8) { + auto r = static_cast(x) * static_cast(y) + static_cast(a) + static_cast(c); + *lo = r; + return r >> 8; + } + } + + static digit _divmod(digit x_hi, digit x_lo, digit y, digit* result) { + #if defined HAVE____INT128_T + if (digit_bits == 64) { + auto x = static_cast<__uint128_t>(x_hi) << digit_bits | static_cast<__uint128_t>(x_lo); + digit q = x / y; + digit r = x % y; + + *result = q; + return r; + } + #endif + if (digit_bits == 64) { + // quotient + digit q = x_lo << 1; + + // remainder + digit r = x_hi; + + digit carry = x_lo >> 63; + int i; + + for (i = 0; i < 64; i++) { + auto tmp = r >> 63; + r <<= 1; + r |= carry; + carry = tmp; + + if (carry == 0) { + if (r >= y) { + carry = 1; + } else { + tmp = q >> 63; + q <<= 1; + q |= carry; + carry = tmp; + continue; + } + } + + r -= y; + r -= (1 - carry); + carry = 1; + tmp = q >> 63; + q <<= 1; + q |= carry; + carry = tmp; + } + + *result = q; + return r; + } + if (digit_bits == 32) { + auto x = static_cast(x_hi) << 32 | static_cast(x_lo); + digit q = x / y; + digit r = x % y; + + *result = q; + return r; + } + if (digit_bits == 16) { + auto x = static_cast(x_hi) << 16 | static_cast(x_lo); + digit q = x / y; + digit r = x % y; + + *result = q; + return r; + } + if (digit_bits == 8) { + auto x = static_cast(x_hi) << 8 | static_cast(x_lo); + digit q = x / y; + digit r = x % y; + + *result = q; + return r; + } + } + + static digit _addcarry(digit x, digit y, digit c, digit* result) { + #if defined HAVE___ADDCARRY_U64 + if (digit_bits == 64) { + return _addcarry_u64(c, x, y, result); // _addcarry_u64(carryin, x, y, *sum) -> carryout + } + #endif + #if defined HAVE___ADDCARRY_U32 + if (digit_bits == 32) { + return _addcarry_u32(c, x, y, result); // _addcarry_u32(carryin, x, y, *sum) -> carryout + } + #endif + #if defined HAVE___ADDCARRY_U16 + if (digit_bits == 16) { + return _addcarry_u16(c, x, y, result); // _addcarry_u16(carryin, x, y, *sum) -> carryout + } + #endif + #if defined HAVE____BUILTIN_ADDCLL + if (digit_octets == sizeof(unsigned long long)) { + unsigned long long carryout; + *result = __builtin_addcll(x, y, c, &carryout); // __builtin_addcll(x, y, carryin, *carryout) -> sum + return carryout; + } + #endif + #if defined HAVE____BUILTIN_ADDCL + if (digit_octets == sizeof(unsigned long)) { + unsigned long carryout; + *result = __builtin_addcl(x, y, c, &carryout); // __builtin_addcl(x, y, carryin, *carryout) -> sum + return carryout; + } + #endif + #if defined HAVE____BUILTIN_ADDC + if (digit_octets == sizeof(unsigned)) { + unsigned carryout; + *result = __builtin_addc(x, y, c, &carryout); // __builtin_addc(x, y, carryin, *carryout) -> sum + return carryout; + } + #endif + #if defined HAVE____INT128_T + if (digit_bits == 64) { + auto r = static_cast<__uint128_t>(x) + static_cast<__uint128_t>(y) + static_cast<__uint128_t>(c); + *result = r; + return static_cast(r >> digit_bits); + } + #endif + if (digit_bits == 64) { + digit x0 = x & 0xffffffffUL; + digit x1 = x >> 32; + digit y0 = y & 0xffffffffUL; + digit y1 = y >> 32; + + auto u = x0 + y0 + c; + auto v = x1 + y1 + static_cast(u >> 32); + *result = (v << 32) + (u & 0xffffffffUL); + return static_cast(v >> 32); + } + if (digit_bits == 32) { + auto r = static_cast(x) + static_cast(y) + static_cast(c); + *result = r; + return static_cast(r >> 32); + } + if (digit_bits == 16) { + auto r = static_cast(x) + static_cast(y) + static_cast(c); + *result = r; + return static_cast(r >> 16); + } + if (digit_bits == 8) { + auto r = static_cast(x) + static_cast(y) + static_cast(c); + *result = r; + return static_cast(r >> 8); + } + } + + static digit _subborrow(digit x, digit y, digit c, digit* result) { + #if defined HAVE___SUBBORROW_U64 + if (digit_bits == 64) { + return _subborrow_u64(c, x, y, result); // _subborrow_u64(carryin, x, y, *sum) -> carryout + } + #endif + #if defined HAVE___SUBBORROW_U32 + if (digit_bits == 64) { + return _subborrow_u32(c, x, y, result); // _subborrow_u32(carryin, x, y, *sum) -> carryout + } + #endif + #if defined HAVE___SUBBORROW_U16 + if (digit_bits == 64) { + return _subborrow_u16(c, x, y, result); // _subborrow_u16(carryin, x, y, *sum) -> carryout + } + #endif + #if defined HAVE____BUILTIN_SUBCLL + if (digit_octets == sizeof(unsigned long long)) { + unsigned long long carryout; + *result = __builtin_subcll(x, y, c, &carryout); // __builtin_subcll(x, y, carryin, *carryout) -> sum + return carryout; + } + #endif + #if defined HAVE____BUILTIN_SUBCL + if (digit_octets == sizeof(unsigned long)) { + unsigned long carryout; + *result = __builtin_subcl(x, y, c, &carryout); // __builtin_subcl(x, y, carryin, *carryout) -> sum + return carryout; + } + #endif + #if defined HAVE____BUILTIN_SUBC + if (digit_octets == sizeof(unsigned)) { + unsigned carryout; + *result = __builtin_subc(x, y, c, &carryout); // __builtin_subc(x, y, carryin, *carryout) -> sum + return carryout; + } + #endif + #if defined HAVE____INT128_T + if (digit_bits == 64) { + auto r = static_cast<__uint128_t>(x) - static_cast<__uint128_t>(y) - static_cast<__uint128_t>(c); + *result = r; + return static_cast(r >> 64); + } + #endif + if (digit_bits == 64) { + digit x0 = x & 0xffffffffUL; + digit x1 = x >> 32; + digit y0 = y & 0xffffffffUL; + digit y1 = y >> 32; + + auto u = x0 - y0 - c; + auto v = x1 - y1 - static_cast(u >> 32); + *result = (v << 32) + (u & 0xffffffffUL); + return static_cast(v >> 32); + } + if (digit_bits == 32) { + auto r = static_cast(x) - static_cast(y) - static_cast(c); + *result = r; + return static_cast(r >> 32); + } + if (digit_bits == 16) { + auto r = static_cast(x) - static_cast(y) - static_cast(c); + *result = r; + return static_cast(r >> 16); + } + if (digit_bits == 8) { + auto r = static_cast(x) - static_cast(y) - static_cast(c); + *result = r; + return static_cast(r >> 8); + } + } + + // Helper functions + + void trim(digit mask = 0) { + auto rit = rbegin(); + auto rit_e = rend(); + + // Masks the last value of internal vector + mask &= (digit_bits - 1); + if (mask && rit != rit_e) { + *rit &= (static_cast(1) << mask) - 1; + } + + // Removes all unused zeros from the internal vector + auto rit_f = std::find_if(rit, rit_e, [](const digit& c) { return c; }); + resize(rit_e - rit_f); // shrink + } + + static constexpr char chr(int ord) { + constexpr const char _[256] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', + 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + }; + return _[ord]; + } + + static constexpr int ord(int chr) { + constexpr const int _[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + + -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }; + return _[chr]; + } + +public: + static constexpr unsigned base_bits(int base) { + constexpr const unsigned _[256] = { + 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, + }; + return _[base - 1]; + } + + static constexpr unsigned base_size(int base) { + constexpr const unsigned _[256] = { + 0, 64, 41, 32, 28, 25, 23, 22, 21, 20, 19, 18, 18, 17, 17, 16, + 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, + + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 8, + }; + return _[base - 1]; + } + + static const uinteger_t uint_0() { + static uinteger_t uint_0(0); + return uint_0; + } + + static const uinteger_t uint_1() { + static uinteger_t uint_1(1); + return uint_1; + } + +private: + // Public Implementation +#ifdef UINT_T_PUBLIC_IMPLEMENTATION +public: +#endif + static uinteger_t& bitwise_and(uinteger_t& lhs, const uinteger_t& rhs) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + if (lhs_sz > rhs_sz) { + lhs.resize(rhs_sz); // shrink + } + + auto lhs_it = lhs.begin(); + auto lhs_it_e = lhs.end(); + + auto rhs_it = rhs.begin(); + + for (; lhs_it != lhs_it_e; ++lhs_it, ++rhs_it) { + *lhs_it &= *rhs_it; + } + + // Finish up + lhs.trim(); + return lhs; + } + + static uinteger_t& bitwise_and(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + auto result_sz = std::max(lhs_sz, rhs_sz); + result.resize(result_sz); + + // not using `end()` because resize of `result.resize()` could have + // resized `lhs` or `rhs` if `result` is also either `rhs` or `lhs`. + auto lhs_it = lhs.begin(); + auto lhs_it_e = lhs_it + lhs_sz; + + auto rhs_it = rhs.begin(); + auto rhs_it_e = rhs_it + rhs_sz; + + auto it = result.begin(); + + if (lhs_sz < rhs_sz) { + for (; lhs_it != lhs_it_e; ++lhs_it, ++rhs_it, ++it) { + *it = *lhs_it & *rhs_it; + } + for (; rhs_it != rhs_it_e; ++rhs_it, ++it) { + *it = 0; + } + } else { + for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it, ++it) { + *it = *lhs_it & *rhs_it; + } + for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { + *it = 0; + } + } + + // Finish up + result.trim(); + return result; + } + + static uinteger_t bitwise_and(const uinteger_t& lhs, const uinteger_t& rhs) { + uinteger_t result; + bitwise_and(result, lhs, rhs); + return result; + } + + static uinteger_t& bitwise_or(uinteger_t& lhs, const uinteger_t& rhs) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + if (lhs_sz < rhs_sz) { + lhs.resize(rhs_sz, 0); // grow + } + + auto lhs_it = lhs.begin(); + + auto rhs_it = rhs.begin(); + auto rhs_it_e = rhs.end(); + + for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it) { + *lhs_it |= *rhs_it; + } + + // Finish up + lhs.trim(); + return lhs; + } + + static uinteger_t& bitwise_or(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + auto result_sz = std::max(lhs_sz, rhs_sz); + result.resize(result_sz); + + // not using `end()` because resize of `result.resize()` could have + // resized `lhs` or `rhs` if `result` is also either `rhs` or `lhs`. + auto lhs_it = lhs.begin(); + auto lhs_it_e = lhs_it + lhs_sz; + + auto rhs_it = rhs.begin(); + auto rhs_it_e = rhs_it + rhs_sz; + + auto it = result.begin(); + + if (lhs_sz < rhs_sz) { + for (; lhs_it != lhs_it_e; ++lhs_it, ++rhs_it, ++it) { + *it = *lhs_it | *rhs_it; + } + for (; rhs_it != rhs_it_e; ++rhs_it, ++it) { + *it = *rhs_it; + } + } else { + for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it, ++it) { + *it = *lhs_it | *rhs_it; + } + for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { + *it = *lhs_it; + } + } + + // Finish up + result.trim(); + return result; + } + static uinteger_t bitwise_or(const uinteger_t& lhs, const uinteger_t& rhs) { + uinteger_t result; + bitwise_or(result, lhs, rhs); + return result; + } + + static uinteger_t& bitwise_xor(uinteger_t& lhs, const uinteger_t& rhs) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + if (lhs_sz < rhs_sz) { + lhs.resize(rhs_sz, 0); // grow + } + + auto lhs_it = lhs.begin(); + + auto rhs_it = rhs.begin(); + auto rhs_it_e = rhs.end(); + + for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it) { + *lhs_it ^= *rhs_it; + } + + // Finish up + lhs.trim(); + return lhs; + } + + static uinteger_t& bitwise_xor(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + auto result_sz = std::max(lhs_sz, rhs_sz); + result.resize(result_sz); + + // not using `end()` because resize of `result.resize()` could have + // resized `lhs` or `rhs` if `result` is also either `rhs` or `lhs`. + auto lhs_it = lhs.begin(); + auto lhs_it_e = lhs_it + lhs_sz; + + auto rhs_it = rhs.begin(); + auto rhs_it_e = rhs_it + rhs_sz; + + auto it = result.begin(); + + if (lhs_sz < rhs_sz) { + for (; lhs_it != lhs_it_e; ++lhs_it, ++rhs_it, ++it) { + *it = *lhs_it ^ *rhs_it; + } + for (; rhs_it != rhs_it_e; ++rhs_it, ++it) { + *it = *rhs_it; + } + } else { + for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it, ++it) { + *it = *lhs_it ^ *rhs_it; + } + for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { + *it = *lhs_it; + } + } + + // Finish up + result.trim(); + return result; + } + + static uinteger_t bitwise_xor(const uinteger_t& lhs, const uinteger_t& rhs) { + uinteger_t result; + bitwise_xor(result, lhs, rhs); + return result; + } + + static uinteger_t& bitwise_inv(uinteger_t& lhs) { + auto lhs_sz = lhs.size(); + + auto b = lhs.bits(); + + if (!lhs_sz) { + lhs.append(0); + } + + // not using `end()` because resize of `result.resize()` could have + // resized `lhs` if `result` is also `lhs`. + auto lhs_it = lhs.begin(); + auto lhs_it_e = lhs_it + lhs_sz; + + for (; lhs_it != lhs_it_e; ++lhs_it) { + *lhs_it = ~*lhs_it; + } + + // Finish up + lhs.trim(b ? b : 1); + return lhs; + } + + static uinteger_t& bitwise_inv(uinteger_t& result, const uinteger_t& lhs) { + auto lhs_sz = lhs.size(); + + auto b = lhs.bits(); + + auto result_sz = lhs_sz ? lhs_sz : 1; + result.resize(result_sz); + + // not using `end()` because resize of `result.resize()` could have + // resized `lhs` if `result` is also `lhs`. + auto lhs_it = lhs.begin(); + auto lhs_it_e = lhs_it + lhs_sz; + + auto it = result.begin(); + auto it_e = it + result_sz; + + for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { + *it = ~*lhs_it; + } + for (; it != it_e; ++it) { + *it = ~static_cast(0); + } + + // Finish up + result.trim(b ? b : 1); + return result; + } + + static uinteger_t bitwise_inv(const uinteger_t& lhs) { + uinteger_t result; + bitwise_inv(result, lhs); + return result; + } + + static uinteger_t& bitwise_lshift(uinteger_t& lhs, const uinteger_t& rhs) { + if (!rhs) { + return lhs; + } + + uinteger_t shifts_q; + uinteger_t shifts_r; + auto _digit_bits = digit_bits; + auto uint_digit_bits = uinteger_t(_digit_bits); + divmod(shifts_q, shifts_r, rhs, uint_digit_bits); + std::size_t shifts = static_cast(shifts_q); + std::size_t shift = static_cast(shifts_r); + + if (shifts) { + lhs.prepend(shifts, 0); + } + if (shift) { + digit shifted = 0; + auto lhs_it = lhs.begin() + shifts; + auto lhs_it_e = lhs.end(); + for (; lhs_it != lhs_it_e; ++lhs_it) { + auto v = (*lhs_it << shift) | shifted; + shifted = *lhs_it >> (_digit_bits - shift); + *lhs_it = v; + } + if (shifted) { + lhs.append(shifted); + } + } + + // Finish up + lhs.trim(); + return lhs; + } + + static uinteger_t& bitwise_lshift(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { + if (&result._value == &lhs._value) { + bitwise_lshift(result, rhs); + return result; + } + if (!rhs) { + result = lhs; + return result; + } + + auto lhs_sz = lhs.size(); + + uinteger_t shifts_q; + uinteger_t shifts_r; + auto _digit_bits = digit_bits; + auto uint_digit_bits = uinteger_t(_digit_bits); + divmod(shifts_q, shifts_r, rhs, uint_digit_bits); + std::size_t shifts = static_cast(shifts_q); + std::size_t shift = static_cast(shifts_r); + + auto result_sz = lhs_sz + shifts; + result.grow(result_sz + 1); + result.resize(shifts, 0); + result.resize(result_sz); + + // not using `end()` because resize of `result.resize()` could have + // resized `lhs` if `result` is also `lhs`. + auto lhs_it = lhs.begin(); + auto lhs_it_e = lhs_it + lhs_sz; + + auto it = result.begin() + shifts; + + if (shift) { + digit shifted = 0; + for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { + auto v = (*lhs_it << shift) | shifted; + shifted = *lhs_it >> (_digit_bits - shift); + *it = v; + } + if (shifted) { + result.append(shifted); + } + } else { + for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { + *it = *lhs_it; + } + } + + // Finish up + result.trim(); + return result; + } + + static uinteger_t bitwise_lshift(const uinteger_t& lhs, const uinteger_t& rhs) { + uinteger_t result; + bitwise_lshift(result, lhs, rhs); + return result; + } + + static uinteger_t& bitwise_rshift(uinteger_t& lhs, const uinteger_t& rhs) { + if (!rhs) { + return lhs; + } + + auto lhs_sz = lhs.size(); + + auto _digit_bits = digit_bits; + if (compare(rhs, uinteger_t(lhs_sz * _digit_bits)) >= 0) { + lhs = uint_0(); + return lhs; + } + + uinteger_t shifts_q; + uinteger_t shifts_r; + auto uint_digit_bits = uinteger_t(_digit_bits); + divmod(shifts_q, shifts_r, rhs, uint_digit_bits); + std::size_t shifts = static_cast(shifts_q); + std::size_t shift = static_cast(shifts_r); + + if (shifts) { + lhs._begin += shifts; + } + if (shift) { + digit shifted = 0; + auto lhs_rit = lhs.rbegin(); + auto lhs_rit_e = lhs.rend(); + for (; lhs_rit != lhs_rit_e; ++lhs_rit) { + auto v = (*lhs_rit >> shift) | shifted; + shifted = *lhs_rit << (_digit_bits - shift); + *lhs_rit = v; + } + lhs.trim(); + } + + return lhs; + } + + static uinteger_t& bitwise_rshift(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { + if (&result._value == &lhs._value) { + bitwise_lshift(result, rhs); + return result; + } + if (!rhs) { + result = lhs; + return result; + } + + auto lhs_sz = lhs.size(); + + auto _digit_bits = digit_bits; + if (compare(rhs, uinteger_t(lhs_sz * _digit_bits)) >= 0) { + result = uint_0(); + return result; + } + + uinteger_t shifts_q; + uinteger_t shifts_r; + auto uint_digit_bits = uinteger_t(_digit_bits); + divmod(shifts_q, shifts_r, rhs, uint_digit_bits); + std::size_t shifts = static_cast(shifts_q); + std::size_t shift = static_cast(shifts_r); + + auto result_sz = lhs_sz - shifts; + result.resize(result_sz); + + // not using `end()` because resize of `result.resize()` could have + // resized `lhs` if `result` is also `lhs`. + auto lhs_rit = lhs.rbegin(); + auto lhs_rit_e = lhs_rit + lhs_sz - shifts; + + auto rit = result.rbegin(); + auto rit_e = rit + result_sz; + + if (shift) { + digit shifted = 0; + for (; lhs_rit != lhs_rit_e; ++lhs_rit, ++rit) { + ASSERT(rit != rit_e); (void)(rit_e); + auto v = (*lhs_rit >> shift) | shifted; + shifted = *lhs_rit << (_digit_bits - shift); + *rit = v; + } + } else { + for (; lhs_rit != lhs_rit_e; ++lhs_rit, ++rit) { + ASSERT(rit != rit_e); (void)(rit_e); + *rit = *lhs_rit; + } + } + + // Finish up + result.trim(); + return result; + } + + static uinteger_t bitwise_rshift(const uinteger_t& lhs, const uinteger_t& rhs) { + uinteger_t result; + bitwise_rshift(result, lhs, rhs); + return result; + } + + static int compare(const uinteger_t& lhs, const uinteger_t& rhs) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + if (lhs_sz > rhs_sz) return 1; + if (lhs_sz < rhs_sz) return -1; + + auto lhs_rit = lhs.rbegin(); + auto lhs_rit_e = lhs.rend(); + + auto rhs_rit = rhs.rbegin(); + + for (; lhs_rit != lhs_rit_e && *lhs_rit == *rhs_rit; ++lhs_rit, ++rhs_rit); + + if (lhs_rit != lhs_rit_e) { + if (*lhs_rit > *rhs_rit) return 1; + if (*lhs_rit < *rhs_rit) return -1; + } + + return 0; + } + + static uinteger_t& long_add(uinteger_t& lhs, const uinteger_t& rhs) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + if (lhs_sz < rhs_sz) { + lhs.reserve(rhs_sz + 1); + lhs.resize(rhs_sz, 0); // grow + } + + // not using `end()` because resize of `lhs.resize()` could have + // resized `lhs`. + auto lhs_it = lhs.begin(); + auto lhs_it_e = lhs_it + lhs_sz; + + auto rhs_it = rhs.begin(); + auto rhs_it_e = rhs_it + rhs_sz; + + digit carry = 0; + if (lhs_sz < rhs_sz) { + for (; lhs_it != lhs_it_e; ++rhs_it, ++lhs_it) { + carry = _addcarry(*lhs_it, *rhs_it, carry, &*lhs_it); + } + for (; carry && rhs_it != rhs_it_e; ++rhs_it, ++lhs_it) { + carry = _addcarry(0, *rhs_it, carry, &*lhs_it); + } + for (; rhs_it != rhs_it_e; ++rhs_it, ++lhs_it) { + *lhs_it = *rhs_it; + } + } else { + for (; rhs_it != rhs_it_e; ++rhs_it, ++lhs_it) { + carry = _addcarry(*lhs_it, *rhs_it, carry, &*lhs_it); + } + for (; carry && lhs_it != lhs_it_e; ++lhs_it) { + carry = _addcarry(*lhs_it, 0, carry, &*lhs_it); + } + } + + if (carry) { + lhs.append(1); + } + + lhs._carry = false; + + // Finish up + lhs.trim(); + return lhs; + } + + static uinteger_t& long_add(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + auto result_sz = std::max(lhs_sz, rhs_sz); + result.reserve(result_sz + 1); + result.resize(result_sz, 0); + + // not using `end()` because resize of `result.resize()` could have + // resized `lhs` or `rhs` if `result` is also either `rhs` or `lhs`. + auto lhs_it = lhs.begin(); + auto lhs_it_e = lhs_it + lhs_sz; + + auto rhs_it = rhs.begin(); + auto rhs_it_e = rhs_it + rhs_sz; + + auto it = result.begin(); + + digit carry = 0; + if (lhs_sz < rhs_sz) { + for (; lhs_it != lhs_it_e; ++lhs_it, ++rhs_it, ++it) { + carry = _addcarry(*lhs_it, *rhs_it, carry, &*it); + } + for (; carry && rhs_it != rhs_it_e; ++rhs_it, ++it) { + carry = _addcarry(0, *rhs_it, carry, &*it); + } + for (; rhs_it != rhs_it_e; ++rhs_it, ++it) { + *it = *rhs_it; + } + } else { + for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it, ++it) { + carry = _addcarry(*lhs_it, *rhs_it, carry, &*it); + } + for (; carry && lhs_it != lhs_it_e; ++lhs_it, ++it) { + carry = _addcarry(*lhs_it, 0, carry, &*it); + } + for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { + *it = *lhs_it; + } + } + + if (carry) { + result.append(1); + } + result._carry = false; + + // Finish up + result.trim(); + return result; + } + + static uinteger_t& add(uinteger_t& lhs, const uinteger_t& rhs) { + // First try saving some calculations: + if (!rhs) { + return lhs; + } + if (!lhs) { + lhs = rhs; + return lhs; + } + + return long_add(lhs, rhs); + } + + static uinteger_t& add(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { + // First try saving some calculations: + if (!rhs) { + result = lhs; + return result; + } + if (!lhs) { + result = rhs; + return result; + } + + return long_add(result, lhs, rhs); + } + + static uinteger_t add(const uinteger_t& lhs, const uinteger_t& rhs) { + uinteger_t result; + add(result, lhs, rhs); + return result; + } + + static uinteger_t& long_sub(uinteger_t& lhs, const uinteger_t& rhs) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + if (lhs_sz < rhs_sz) { + lhs.resize(rhs_sz, 0); // grow + } + + // not using `end()` because resize of `lhs.resize()` could have + // resized `lhs`. + auto lhs_it = lhs.begin(); + auto lhs_it_e = lhs_it + lhs_sz; + + auto rhs_it = rhs.begin(); + auto rhs_it_e = rhs_it + rhs_sz; + + digit borrow = 0; + if (lhs_sz < rhs_sz) { + for (; lhs_it != lhs_it_e; ++lhs_it, ++rhs_it) { + borrow = _subborrow(*lhs_it, *rhs_it, borrow, &*lhs_it); + } + for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it) { + borrow = _subborrow(0, *rhs_it, borrow, &*lhs_it); + } + } else { + for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it) { + borrow = _subborrow(*lhs_it, *rhs_it, borrow, &*lhs_it); + } + for (; borrow && lhs_it != lhs_it_e; ++lhs_it) { + borrow = _subborrow(*lhs_it, 0, borrow, &*lhs_it); + } + } + + lhs._carry = borrow; + + // Finish up + lhs.trim(); + return lhs; + } + + static uinteger_t& long_sub(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + auto result_sz = std::max(lhs_sz, rhs_sz); + result.resize(result_sz, 0); + + // not using `end()` because resize of `result.resize()` could have + // resized `lhs` or `rhs` if `result` is also either `rhs` or `lhs`. + auto lhs_it = lhs.begin(); + auto lhs_it_e = lhs_it + lhs_sz; + + auto rhs_it = rhs.begin(); + auto rhs_it_e = rhs_it + rhs_sz; + + auto it = result.begin(); + + digit borrow = 0; + if (lhs_sz < rhs_sz) { + for (; lhs_it != lhs_it_e; ++lhs_it, ++rhs_it, ++it) { + borrow = _subborrow(*lhs_it, *rhs_it, borrow, &*it); + } + for (; rhs_it != rhs_it_e; ++rhs_it, ++it) { + borrow = _subborrow(0, *rhs_it, borrow, &*it); + } + } else { + for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it, ++it) { + borrow = _subborrow(*lhs_it, *rhs_it, borrow, &*it); + } + for (; borrow && lhs_it != lhs_it_e; ++lhs_it, ++it) { + borrow = _subborrow(*lhs_it, 0, borrow, &*it); + } + for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { + *it = *lhs_it; + } + } + + result._carry = borrow; + + // Finish up + result.trim(); + return result; + } + + static uinteger_t& sub(uinteger_t& lhs, const uinteger_t& rhs) { + // First try saving some calculations: + if (!rhs) { + return lhs; + } + + return long_sub(lhs, rhs); + } + + static uinteger_t& sub(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { + // First try saving some calculations: + if (!rhs) { + result = lhs; + return result; + } + + return long_sub(result, lhs, rhs); + } + + static uinteger_t sub(const uinteger_t& lhs, const uinteger_t& rhs) { + uinteger_t result; + sub(result, lhs, rhs); + return result; + } + + // Single word long multiplication + // Fastests, but ONLY for single sized rhs + static uinteger_t& single_mult(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + ASSERT(rhs_sz == 1); (void)(rhs_sz); + auto n = rhs.front(); + + uinteger_t tmp; + tmp.resize(lhs_sz + 1, 0); + + auto it_lhs = lhs.begin(); + auto it_lhs_e = lhs.end(); + + auto it_result = tmp.begin(); + + digit carry = 0; + for (; it_lhs != it_lhs_e; ++it_lhs, ++it_result) { + carry = _multadd(*it_lhs, n, 0, carry, &*it_result); + } + if (carry) { + *it_result = carry; + } + + result = std::move(tmp); + + // Finish up + result.trim(); + return result; + } + + static uinteger_t& long_mult(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + if (lhs_sz > rhs_sz) { + // rhs should be the largest: + return long_mult(result, rhs, lhs); + } + + if (lhs_sz == 1) { + return single_mult(result, rhs, lhs); + } + + uinteger_t tmp; + tmp.resize(lhs_sz + rhs_sz, 0); + + auto it_lhs = lhs.begin(); + auto it_lhs_e = lhs.end(); + + auto it_rhs = rhs.begin(); + auto it_rhs_e = rhs.end(); + + auto it_result = tmp.begin(); + auto it_result_s = it_result; + auto it_result_l = it_result; + + for (; it_lhs != it_lhs_e; ++it_lhs, ++it_result) { + if (auto lhs_it_val = *it_lhs) { + auto _it_rhs = it_rhs; + auto _it_result = it_result; + digit carry = 0; + for (; _it_rhs != it_rhs_e; ++_it_rhs, ++_it_result) { + carry = _multadd(*_it_rhs, lhs_it_val, *_it_result, carry, &*_it_result); + } + if (carry) { + *_it_result++ = carry; + } + if (it_result_l < _it_result) { + it_result_l = _it_result; + } + } + } + + tmp.resize(it_result_l - it_result_s); // shrink + + result = std::move(tmp); + + // Finish up + result.trim(); + return result; + } + + // A helper for Karatsuba multiplication to split a number in two, at n. + static std::pair karatsuba_mult_split(const uinteger_t& num, std::size_t n) { + const uinteger_t a(num, num._begin, num._begin + n); + const uinteger_t b(num, num._begin + n, num._end); + return std::make_pair(std::move(a), std::move(b)); + } + + // If rhs has at least twice the digits of lhs, and lhs is big enough that + // Karatsuba would pay off *if* the inputs had balanced sizes. + // View rhs as a sequence of slices, each with lhs.size() digits, + // and multiply the slices by lhs, one at a time. + static uinteger_t& karatsuba_lopsided_mult(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs, std::size_t cutoff) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + ASSERT(lhs_sz > cutoff); + ASSERT(2 * lhs_sz <= rhs_sz); + + auto rhs_begin = rhs._begin; + std::size_t shift = 0; + + uinteger_t r; + while (rhs_sz > 0) { + // Multiply the next slice of rhs by lhs and add into result: + auto slice_size = std::min(lhs_sz, rhs_sz); + const uinteger_t rhs_slice(rhs, rhs_begin, rhs_begin + slice_size); + uinteger_t p; + karatsuba_mult(p, lhs, rhs_slice, cutoff); + uinteger_t rs(r, shift, 0); + add(rs, rs, p); + shift += slice_size; + rhs_sz -= slice_size; + rhs_begin += slice_size; + } + + result = std::move(r); + return result; + } + + // Karatsuba multiplication + static uinteger_t& karatsuba_mult(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs, std::size_t cutoff = 1) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + if (lhs_sz > rhs_sz) { + // rhs should be the largest: + return karatsuba_mult(result, rhs, lhs, cutoff); + } + + if (lhs_sz <= cutoff) { + return long_mult(result, lhs, rhs); + } + + // If a is too small compared to b, splitting on b gives a degenerate case + // in which Karatsuba may be (even much) less efficient than long multiplication. + if (2 * lhs_sz <= rhs_sz) { + return karatsuba_lopsided_mult(result, lhs, rhs, cutoff); + } + + // Karatsuba: + // + // A B + // x C D + // --------------------- + // AD BD + // AC BC + // --------------------- + // AC AD + BC BD + // + // AD + BC = + // AC + AD + BC + BD - AC - BD + // (A + B) (C + D) - AC - BD + + // Calculate the split point near the middle of the largest (rhs). + auto shift = rhs_sz >> 1; + + // Split to get A and B: + const auto lhs_pair = karatsuba_mult_split(lhs, shift); + const auto& A = lhs_pair.second; // hi + const auto& B = lhs_pair.first; // lo + + // Split to get C and D: + const auto rhs_pair = karatsuba_mult_split(rhs, shift); + const auto& C = rhs_pair.second; // hi + const auto& D = rhs_pair.first; // lo + + // Get the pieces: + uinteger_t AC; + karatsuba_mult(AC, A, C, cutoff); + + uinteger_t BD; + karatsuba_mult(BD, B, D, cutoff); + uinteger_t AD_BC, AB, CD; + karatsuba_mult(AD_BC, A + B, C + D, cutoff); + AD_BC -= AC; + AD_BC -= BD; + + // Join the pieces, AC and BD (can't overlap) into BD: + BD.reserve(shift * 2 + AC.size()); + BD.resize(shift * 2, 0); + BD.append(AC); + + // And add AD_BC to the middle: (AC BD) + ( AD + BC ): + uinteger_t BDs(BD, shift, 0); + add(BDs, BDs, AD_BC); + + result = std::move(BD); + + // Finish up + result.trim(); + return result; + } + + static uinteger_t& mult(uinteger_t& lhs, const uinteger_t& rhs) { + // Hard to see how this could have a further optimized implementation. + return mult(lhs, lhs, rhs); + } + + static uinteger_t& mult(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { + // First try saving some calculations: + if (!lhs || !rhs) { + result = uint_0(); + return result; + } + if (compare(lhs, uint_1()) == 0) { + result = rhs; + return result; + } + if (compare(rhs, uint_1()) == 0) { + result = lhs; + return result; + } + + return karatsuba_mult(result, lhs, rhs, karatsuba_cutoff); + } + + static uinteger_t mult(const uinteger_t& lhs, const uinteger_t& rhs) { + uinteger_t result; + mult(result, lhs, rhs); + return result; + } + + // Single word long division + // Fastests, but ONLY for single sized rhs + static std::pair, std::reference_wrapper> single_divmod(uinteger_t& quotient, uinteger_t& remainder, const uinteger_t& lhs, const uinteger_t& rhs) { + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + + ASSERT(rhs_sz == 1); (void)(rhs_sz); + auto n = rhs.front(); + + auto rit_lhs = lhs.rbegin(); + auto rit_lhs_e = lhs.rend(); + + auto q = uint_0(); + q.resize(lhs_sz, 0); + auto rit_q = q.rbegin(); + + digit r = 0; + for (; rit_lhs != rit_lhs_e; ++rit_lhs, ++rit_q) { + r = _divmod(r, *rit_lhs, n, &*rit_q); + } + + q.trim(); + + quotient = std::move(q); + remainder = r; + return std::make_pair(std::ref(quotient), std::ref(remainder)); + } + + // Implementation of Knuth's Algorithm D + static std::pair, std::reference_wrapper> knuth_divmod(uinteger_t& quotient, uinteger_t& remainder, const uinteger_t& lhs, const uinteger_t& rhs) { + uinteger_t v(lhs); + uinteger_t w(rhs); + + auto v_size = v.size(); + auto w_size = w.size(); + ASSERT(v_size >= w_size && w_size >= 2); + + // D1. normalize: shift rhs left so that its top digit is >= 63 bits. + // shift lhs left by the same amount. Results go into w and v. + auto d = uinteger_t(digit_bits - _bits(w.back())); + v <<= d; + w <<= d; + + if (*v.rbegin() >= *w.rbegin()) { + v.append(0); + } + v_size = v.size(); + v.append(0); + + // Now *v.rbegin() < *w.rbegin() so quotient has at most + // (and usually exactly) k = v.size() - w.size() digits. + auto k = v_size - w_size; + auto q = uint_0(); + q.resize(k + 1, 0); + + auto rit_q = q.rend() - (k + 1); + + auto it_v_b = v.begin(); + auto it_v_k = it_v_b + k; + + auto it_w = w.begin(); + auto it_w_e = w.end(); + + auto rit_w = w.rbegin(); + auto wm1 = *rit_w++; + auto wm2 = *rit_w; + + // D2. inner loop: divide v[k+0..k+n] by w[0..n] + for (; it_v_k >= it_v_b; --it_v_k, ++rit_q) { + // D3. Compute estimate quotient digit q; may overestimate by 1 (rare) + digit _q; + auto _r = _divmod(*(it_v_k + w_size), *(it_v_k + w_size - 1), wm1, &_q); + digit mullo = 0; + auto mulhi = _mult(_q, wm2, &mullo); + auto rlo = *(it_v_k + w_size - 2); + while (mulhi > _r || (mulhi == _r && mullo > rlo)) { + --_q; + if (_addcarry(_r, wm1, 0, &_r)) { + break; + } + mulhi = _mult(_q, wm2, &mullo); + } + + // D4. Multiply and subtract _q * w0[0:size_w] from vk[0:size_w+1] + auto _it_v = it_v_k; + auto _it_w = it_w; + mulhi = 0; + digit carry = 0; + for (; _it_w != it_w_e; ++_it_v, ++_it_w) { + mullo = 0; + mulhi = _multadd(*_it_w, _q, 0, mulhi, &mullo); + carry = _subborrow(*_it_v, mullo, carry, &*_it_v); + } + carry = _subborrow(*_it_v, 0, carry, &*_it_v); + + if (carry) { + // D6. Add w back if q was too large (this branch taken rarely) + --_q; + + _it_v = it_v_k; + _it_w = it_w; + carry = 0; + for (; _it_w != it_w_e; ++_it_v, ++_it_w) { + carry = _addcarry(*_it_v, *_it_w, carry, &*_it_v); + } + carry = _addcarry(*_it_v, 0, carry, &*_it_v); + } + + /* store quotient digit */ + *rit_q = _q; + } + + // D8. unnormalize: unshift remainder. + v.resize(w_size); + v >>= d; + + q.trim(); + v.trim(); + + quotient = std::move(q); + remainder = std::move(v); + return std::make_pair(std::ref(quotient), std::ref(remainder)); + } + + static std::pair, std::reference_wrapper> divmod(uinteger_t& quotient, uinteger_t& remainder, const uinteger_t& lhs, const uinteger_t& rhs) { + // First try saving some calculations: + if (!rhs) { + throw std::domain_error("Error: division or modulus by 0"); + } + auto lhs_sz = lhs.size(); + auto rhs_sz = rhs.size(); + if (lhs_sz == 1 && rhs_sz == 1) { + // Fast division and modulo for single value + auto a = *lhs.begin(); + auto b = *rhs.begin(); + quotient = a / b; + remainder = a % b; + return std::make_pair(std::ref(quotient), std::ref(remainder)); + } + if (compare(rhs, uint_1()) == 0) { + quotient = lhs; + remainder = uint_0(); + return std::make_pair(std::ref(quotient), std::ref(remainder)); + } + auto compared = compare(lhs, rhs); + if (compared == 0) { + quotient = uint_1(); + remainder = uint_0(); + return std::make_pair(std::ref(quotient), std::ref(remainder)); + } + if (!lhs || compared < 0) { + quotient = uint_0(); + remainder = lhs; + return std::make_pair(std::ref(quotient), std::ref(remainder)); + } + if (rhs_sz == 1) { + return single_divmod(quotient, remainder, lhs, rhs); + } + + return knuth_divmod(quotient, remainder, lhs, rhs); + } + + static std::pair divmod(const uinteger_t& lhs, const uinteger_t& rhs) { + uinteger_t quotient; + uinteger_t remainder; + divmod(quotient, remainder, lhs, rhs); + return std::make_pair(std::move(quotient), std::move(remainder)); + } + +private: + // Constructors + + template ::value and not std::is_same>::value>> + void _uint_t(const T& value) { + append(static_cast(value)); + } + + template ::value and not std::is_same>::value>> + void _uint_t(const T& value, Args... args) { + _uint_t(args...); + append(static_cast(value)); + } + + // This constructor creates a window view of the _value + uinteger_t(const uinteger_t& o, std::size_t begin, std::size_t end) : + _begin(begin), + _end(end), + _value(o._value), + _carry(o._carry) { } + +public: + uinteger_t() : + _begin(0), + _end(0), + _value(_value_instance), + _carry(false) { } + + uinteger_t(const uinteger_t& o) : + _begin(0), + _end(0), + _value_instance(o.begin(), o.end()), + _value(_value_instance), + _carry(o._carry) { } + + uinteger_t(uinteger_t&& o) : + _begin(std::move(o._begin)), + _end(std::move(o._end)), + _value_instance(std::move(o._value_instance)), + _value(_value_instance), + _carry(std::move(o._carry)) { } + + template ::value and not std::is_same>::value>> + uinteger_t(const T& value) : + _begin(0), + _end(0), + _value(_value_instance), + _carry(false) { + if (value) { + append(static_cast(value)); + } + } + + template ::value and not std::is_same>::value>> + uinteger_t(const T& value, Args... args) : + _begin(0), + _end(0), + _value(_value_instance), + _carry(false) { + _uint_t(args...); + append(static_cast(value)); + trim(); + } + + template ::value and not std::is_same>::value>> + uinteger_t(std::initializer_list list) : + _begin(0), + _end(0), + _value(_value_instance), + _carry(false) { + reserve(list.size()); + for (const auto& value : list) { + append(static_cast(value)); + } + trim(); + } + + template + explicit uinteger_t(T (&s)[N], int base=10) : + uinteger_t(s, N - 1, base) { } + + explicit uinteger_t(const unsigned char* bytes, std::size_t sz, int base) : + uinteger_t(strtouint(bytes, sz, base)) { } + + explicit uinteger_t(const char* bytes, std::size_t sz, int base) : + uinteger_t(strtouint(bytes, sz, base)) { } + + template + explicit uinteger_t(const std::vector& bytes, int base=10) : + uinteger_t(bytes.data(), bytes.size(), base) { } + + explicit uinteger_t(const std::string& bytes, int base=10) : + uinteger_t(bytes.data(), bytes.size(), base) { } + + // Assignment Operator + uinteger_t& operator=(const uinteger_t& o) { + _begin = 0; + _end = 0; + _value = container(o.begin(), o.end()); + _carry = o._carry; + return *this; + } + uinteger_t& operator=(uinteger_t&& o) { + _begin = std::move(o._begin); + _end = std::move(o._end); + _value_instance = std::move(o._value_instance); + _carry = std::move(o._carry); + return *this; + } + + // Typecast Operators + explicit operator bool() const { + return static_cast(size()); + } + explicit operator unsigned char() const { + return static_cast(size() ? front() : 0); + } + explicit operator unsigned short() const { + return static_cast(size() ? front() : 0); + } + explicit operator unsigned int() const { + return static_cast(size() ? front() : 0); + } + explicit operator unsigned long() const { + return static_cast(size() ? front() : 0); + } + explicit operator unsigned long long() const { + return static_cast(size() ? front() : 0); + } + explicit operator char() const { + return static_cast(size() ? front() : 0); + } + explicit operator short() const { + return static_cast(size() ? front() : 0); + } + explicit operator int() const { + return static_cast(size() ? front() : 0); + } + explicit operator long() const { + return static_cast(size() ? front() : 0); + } + explicit operator long long() const { + return static_cast(size() ? front() : 0); + } + + // Bitwise Operators + uinteger_t operator&(const uinteger_t& rhs) const { + return bitwise_and(*this, rhs); + } + + uinteger_t& operator&=(const uinteger_t& rhs) { + return bitwise_and(*this, rhs); + } + + uinteger_t operator|(const uinteger_t& rhs) const { + return bitwise_or(*this, rhs); + } + + uinteger_t& operator|=(const uinteger_t& rhs) { + return bitwise_or(*this, rhs); + } + + uinteger_t operator^(const uinteger_t& rhs) const { + return bitwise_xor(*this, rhs); + } + + uinteger_t& operator^=(const uinteger_t& rhs) { + return bitwise_xor(*this, rhs); + } + + uinteger_t operator~() const { + return bitwise_inv(*this); + } + + uinteger_t inv() { + return bitwise_inv(*this); + } + + // Bit Shift Operators + uinteger_t operator<<(const uinteger_t& rhs) const { + return bitwise_lshift(*this, rhs); + } + + uinteger_t& operator<<=(const uinteger_t& rhs) { + return bitwise_lshift(*this, rhs); + } + + uinteger_t operator>>(const uinteger_t& rhs) const { + return bitwise_rshift(*this, rhs); + } + + uinteger_t& operator>>=(const uinteger_t& rhs) { + return bitwise_rshift(*this, rhs); + } + + // Logical Operators + bool operator!() const { + return !static_cast(*this); + } + + bool operator&&(const uinteger_t& rhs) const { + return static_cast(*this) && rhs; + } + + bool operator||(const uinteger_t& rhs) const { + return static_cast(*this) || rhs; + } + + // Comparison Operators + bool operator==(const uinteger_t& rhs) const { + return compare(*this, rhs) == 0; + } + + bool operator!=(const uinteger_t& rhs) const { + return compare(*this, rhs) != 0; + } + + bool operator>(const uinteger_t& rhs) const { + return compare(*this, rhs) > 0; + } + + bool operator<(const uinteger_t& rhs) const { + return compare(*this, rhs) < 0; + } + + bool operator>=(const uinteger_t& rhs) const { + return compare(*this, rhs) >= 0; + } + + bool operator<=(const uinteger_t& rhs) const { + return compare(*this, rhs) <= 0; + } + + // Arithmetic Operators + uinteger_t operator+(const uinteger_t& rhs) const { + return add(*this, rhs); + } + + uinteger_t& operator+=(const uinteger_t& rhs) { + return add(*this, rhs); + } + + uinteger_t operator-(const uinteger_t& rhs) const { + return sub(*this, rhs); + } + + uinteger_t& operator-=(const uinteger_t& rhs) { + return sub(*this, rhs); + } + + uinteger_t operator*(const uinteger_t& rhs) const { + return mult(*this, rhs); + } + + uinteger_t& operator*=(const uinteger_t& rhs) { + return mult(*this, rhs); + } + + std::pair divmod(const uinteger_t& rhs) const { + return divmod(*this, rhs); + } + + uinteger_t operator/(const uinteger_t& rhs) const { + return divmod(*this, rhs).first; + } + + uinteger_t& operator/=(const uinteger_t& rhs) { + uinteger_t quotient; + uinteger_t remainder; + divmod(quotient, remainder, *this, rhs); + *this = std::move(quotient); + return *this; + } + + uinteger_t operator%(const uinteger_t& rhs) const { + return divmod(*this, rhs).second; + } + + uinteger_t& operator%=(const uinteger_t& rhs) { + uinteger_t quotient; + uinteger_t remainder; + divmod(quotient, remainder, *this, rhs); + *this = std::move(remainder); + return *this; + } + + // Increment Operator + uinteger_t& operator++() { + return *this += uint_1(); + } + uinteger_t operator++(int) { + uinteger_t temp(*this); + ++*this; + return temp; + } + + // Decrement Operator + uinteger_t& operator--() { + return *this -= uint_1(); + } + uinteger_t operator--(int) { + uinteger_t temp(*this); + --*this; + return temp; + } + + // Nothing done since promotion doesn't work here + uinteger_t operator+() const { + return *this; + } + + // two's complement + uinteger_t operator-() const { + return uint_0() - *this; + } + + // Get private value at index + const digit& value(std::size_t idx) const { + static const digit zero = 0; + return idx < size() ? *(begin() + idx) : zero; + } + + // Get value of bit N + bool operator[](std::size_t n) const { + auto nd = n / digit_bits; + auto nm = n % digit_bits; + return nd < size() ? (*(begin() + nd) >> nm) & 1 : 0; + } + + // Get bitsize of value + std::size_t bits() const { + auto sz = size(); + if (sz) { + return _bits(back()) + (sz - 1) * digit_bits; + } + return 0; + } + + // Get string representation of value + template ::value>> + Result str(int alphabet_base = 10) const { + auto num_sz = size(); + if (alphabet_base >= 2 && alphabet_base <= 36) { + Result result; + if (num_sz) { + auto alphabet_base_bits = base_bits(alphabet_base); + result.reserve(num_sz * base_size(alphabet_base)); + if (alphabet_base_bits) { + digit alphabet_base_mask = alphabet_base - 1; + std::size_t shift = 0; + auto ptr = reinterpret_cast(data()); + digit v = *ptr++; + v <<= half_digit_bits; + for (auto i = num_sz * 2 - 1; i; --i) { + v >>= half_digit_bits; + v |= (static_cast(*ptr++) << half_digit_bits); + do { + auto d = static_cast((v >> shift) & alphabet_base_mask); + result.push_back(chr(d)); + shift += alphabet_base_bits; + } while (shift <= half_digit_bits); + shift -= half_digit_bits; + } + v >>= (shift + half_digit_bits); + while (v) { + auto d = static_cast(v & alphabet_base_mask); + result.push_back(chr(d)); + v >>= alphabet_base_bits; + } + auto s = chr(0); + auto rit_f = std::find_if(result.rbegin(), result.rend(), [s](const char& c) { return c != s; }); + result.resize(result.rend() - rit_f); // shrink + } else { + uinteger_t uint_base = alphabet_base; + uinteger_t quotient = *this; + do { + auto r = quotient.divmod(uint_base); + auto d = static_cast(r.second); + result.push_back(chr(d)); + quotient = std::move(r.first); + } while (quotient); + } + std::reverse(result.begin(), result.end()); + } else { + result.push_back(chr(0)); + } + return result; + } else if (alphabet_base == 256) { + if (num_sz) { + auto ptr = reinterpret_cast(data()); + Result result(ptr, ptr + num_sz * digit_octets); + auto rit_f = std::find_if(result.rbegin(), result.rend(), [](const char& c) { return c; }); + result.resize(result.rend() - rit_f); // shrink + std::reverse(result.begin(), result.end()); + return result; + } else { + Result result; + result.push_back('\x00'); + return result; + } + } else { + throw std::invalid_argument("Base must be in the range [2, 36]"); + } + } + + static uinteger_t strtouint(const void* encoded, std::size_t encoded_size, int alphabet_base) { + const char* data = (const char *)encoded; + uinteger_t result; + + if (alphabet_base >= 2 && alphabet_base <= 36) { + uinteger_t alphabet_base_bits = base_bits(alphabet_base); + uinteger_t uint_base = alphabet_base; + if (alphabet_base_bits) { + for (; encoded_size; --encoded_size, ++data) { + auto d = ord(static_cast(*data)); + if (d < 0) { + throw std::invalid_argument("Error: Not a digit in base " + std::to_string(alphabet_base) + ": '" + std::string(1, *data) + "' at " + std::to_string(encoded_size)); + } + result = (result << alphabet_base_bits) | d; + } + } else { + for (; encoded_size; --encoded_size, ++data) { + auto d = ord(static_cast(*data)); + if (d < 0) { + throw std::invalid_argument("Error: Not a digit in base " + std::to_string(alphabet_base) + ": '" + std::string(1, *data) + "' at " + std::to_string(encoded_size)); + } + result = (result * uint_base) + d; + } + } + } else if (encoded_size && alphabet_base == 256) { + auto value_size = encoded_size / digit_octets; + auto value_padding = encoded_size % digit_octets; + if (value_padding) { + value_padding = digit_octets - value_padding; + ++value_size; + } + result.resize(value_size); // grow (no initialization) + *result.begin() = 0; // initialize value + auto ptr = reinterpret_cast(result.data()); + std::copy(data, data + encoded_size, ptr + value_padding); + std::reverse(ptr, ptr + value_size * digit_octets); + } else { + throw std::invalid_argument("Error: Cannot convert from base " + std::to_string(alphabet_base)); + } + + return result; + } + + template ::value>> + Result bin() const { + return str(2); + } + + template ::value>> + Result oct() const { + return str(8); + } + + template ::value>> + Result hex() const { + return str(16); + } + + template ::value>> + Result raw() const { + return str(256); + } +}; + +namespace std { // This is probably not a good idea + // Make it work with std::string() + inline std::string to_string(uinteger_t& num) { + return num.str(); + } + inline const std::string to_string(const uinteger_t& num) { + return num.str(); + } +} + +// lhs type T as first arguemnt +// If the output is not a bool, casts to type T + +// Bitwise Operators +template ::value and not std::is_same>::value>> +uinteger_t operator&(const T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) & rhs; +} + +template ::value and not std::is_same>::value>> +T& operator&=(T& lhs, const uinteger_t& rhs) { + return lhs = static_cast(rhs & lhs); +} + +template ::value and not std::is_same>::value>> +uinteger_t operator|(const T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) | rhs; +} + +template ::value and not std::is_same>::value>> +T& operator|=(T& lhs, const uinteger_t& rhs) { + return lhs = static_cast(rhs | lhs); +} + +template ::value and not std::is_same>::value>> +uinteger_t operator^(const T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) ^ rhs; +} + +template ::value and not std::is_same>::value>> +T& operator^=(T& lhs, const uinteger_t& rhs) { + return lhs = static_cast(rhs ^ lhs); +} + +// Bitshift operators +template ::value and not std::is_same>::value>> +inline uinteger_t operator<<(T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) << rhs; +} + +template ::value and not std::is_same>::value>> +T& operator<<=(T& lhs, const uinteger_t& rhs) { + return lhs = static_cast(lhs << rhs); +} + +template ::value and not std::is_same>::value>> +inline uinteger_t operator>>(T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) >> rhs; +} + +template ::value and not std::is_same>::value>> +T& operator>>=(T& lhs, const uinteger_t& rhs) { + return lhs = static_cast(lhs >> rhs); +} + +// Comparison Operators +template ::value and not std::is_same>::value>> +bool operator==(const T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) == rhs; +} + +template ::value and not std::is_same>::value>> +bool operator!=(const T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) != rhs; +} + +template ::value and not std::is_same>::value>> +bool operator>(const T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) > rhs; +} + +template ::value and not std::is_same>::value>> +bool operator<(const T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) < rhs; +} + +template ::value and not std::is_same>::value>> +bool operator>=(const T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) >= rhs; +} + +template ::value and not std::is_same>::value>> +bool operator<=(const T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) <= rhs; +} + +// Arithmetic Operators +template ::value and not std::is_same>::value>> +uinteger_t operator+(const T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) + rhs; +} + +template ::value and not std::is_same>::value>> +T& operator+=(T& lhs, const uinteger_t& rhs) { + return lhs = static_cast(rhs + lhs); +} + +template ::value and not std::is_same>::value>> +uinteger_t operator-(const T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) - rhs; +} + +template ::value and not std::is_same>::value>> +T& operator-=(T& lhs, const uinteger_t& rhs) { + return lhs = static_cast(lhs - rhs); +} + +template ::value and not std::is_same>::value>> +uinteger_t operator*(const T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) * rhs; +} + +template ::value and not std::is_same>::value>> +T& operator*=(T& lhs, const uinteger_t& rhs) { + return lhs = static_cast(rhs * lhs); +} + +template ::value and not std::is_same>::value>> +uinteger_t operator/(const T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) / rhs; +} + +template ::value and not std::is_same>::value>> +T& operator/=(T& lhs, const uinteger_t& rhs) { + return lhs = static_cast(lhs / rhs); +} + +template ::value and not std::is_same>::value>> +uinteger_t operator%(const T& lhs, const uinteger_t& rhs) { + return uinteger_t(lhs) % rhs; +} + +template ::value and not std::is_same>::value>> +T& operator%=(T& lhs, const uinteger_t& rhs) { + return lhs = static_cast(lhs % rhs); +} + +// IO Operator +inline std::ostream& operator<<(std::ostream& stream, const uinteger_t& rhs) { + if (stream.flags() & stream.oct) { + stream << rhs.str(8); + } else if (stream.flags() & stream.dec) { + stream << rhs.str(10); + } else if (stream.flags() & stream.hex) { + stream << rhs.str(16); + } + return stream; +} + +#endif diff --git a/contrib/base58 b/contrib/base58 deleted file mode 160000 index a85f98fb4ed..00000000000 --- a/contrib/base58 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit a85f98fb4ed52c2f4029a4b6ac1ef0bafdfc56f5 diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index bf72795aae0..32f7952961c 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -72,8 +72,8 @@ if (TARGET ch_contrib::llvm) target_link_libraries(clickhouse_functions PRIVATE ch_contrib::llvm) endif () -if (TARGET ch_contrib::base58) - target_link_libraries(clickhouse_functions PRIVATE ch_contrib::base58) +if (TARGET ch_contrib::base-x) + target_link_libraries(clickhouse_functions PRIVATE ch_contrib::base-x) endif() if (TARGET ch_contrib::base64) diff --git a/src/Functions/FunctionBase58Conversion.h b/src/Functions/FunctionBase58Conversion.h index a0431ca47df..c0c54fd0ba9 100644 --- a/src/Functions/FunctionBase58Conversion.h +++ b/src/Functions/FunctionBase58Conversion.h @@ -26,19 +26,19 @@ struct Base58Encode { static constexpr auto name = "base58Encode"; - static void process(const ColumnString * input, ColumnString::MutablePtr& dst_column, std::string& alphabet, size_t input_rows_count) + static void process(const ColumnString& input, ColumnString::MutablePtr& dst_column, std::string& alphabet, size_t input_rows_count) { auto & dst_data = dst_column->getChars(); auto & dst_offsets = dst_column->getOffsets(); - size_t current_allocated_size = input->getChars().size(); + size_t current_allocated_size = input.getChars().size(); dst_data.resize(current_allocated_size); dst_offsets.resize(input_rows_count); - const ColumnString::Offsets & src_offsets = input->getOffsets(); + const ColumnString::Offsets & src_offsets = input.getOffsets(); - const auto * source = input->getChars().raw_data(); + const auto * source = input.getChars().raw_data(); auto * dst = dst_data.data(); auto * dst_pos = dst; @@ -48,8 +48,9 @@ struct Base58Encode const auto& encoder = (alphabet == "bitcoin") ? Base58::bitcoin() : ((alphabet == "flickr") ? Base58::flickr() : ((alphabet == "ripple") ? Base58::ripple() : - Base58::base58())); + Base58::base58())); //GMP + std::string encoded; for (size_t row = 0; row < input_rows_count; ++row) { size_t srclen = src_offsets[row] - src_offset_prev - 1; @@ -57,7 +58,7 @@ struct Base58Encode /// We don't know the size of the result string beforehand (it's not byte-to-byte encoding), /// so we may need to do many resizes (the worst case -- we'll do it for each row) /// This way we do exponential resizes and one final resize after whole operation is complete - std::string encoded; + encoded.clear(); if (srclen) encoder.encode(encoded, source, srclen); size_t outlen = encoded.size(); @@ -66,14 +67,15 @@ struct Base58Encode { current_allocated_size += current_allocated_size; dst_data.resize(current_allocated_size); + auto processed_offset = dst_pos - dst; + dst = dst_data.data(); + dst_pos = dst; + dst_pos += processed_offset; } - if (srclen) - std::strcpy(reinterpret_cast(dst_pos), encoded.c_str()); + std::memcpy(dst_pos, encoded.c_str(), ++outlen); source += srclen + 1; dst_pos += outlen; - *dst_pos = '\0'; - dst_pos += 1; dst_offsets[row] = dst_pos - dst; src_offset_prev = src_offsets[row]; @@ -88,19 +90,19 @@ struct Base58Decode { static constexpr auto name = "base58Decode"; - static void process(const ColumnString * input, ColumnString::MutablePtr& dst_column, std::string& alphabet, size_t input_rows_count) + static void process(const ColumnString& input, ColumnString::MutablePtr& dst_column, std::string& alphabet, size_t input_rows_count) { auto & dst_data = dst_column->getChars(); auto & dst_offsets = dst_column->getOffsets(); - size_t current_allocated_size = input->getChars().size(); + size_t current_allocated_size = input.getChars().size(); dst_data.resize(current_allocated_size); dst_offsets.resize(input_rows_count); - const ColumnString::Offsets & src_offsets = input->getOffsets(); + const ColumnString::Offsets & src_offsets = input.getOffsets(); - const auto * source = input->getChars().raw_data(); + const auto * source = input.getChars().raw_data(); auto * dst = dst_data.data(); auto * dst_pos = dst; @@ -112,6 +114,7 @@ struct Base58Decode ((alphabet == "ripple") ? Base58::ripple() : Base58::base58())); + std::string decoded; for (size_t row = 0; row < input_rows_count; ++row) { size_t srclen = src_offsets[row] - src_offset_prev - 1; @@ -119,21 +122,24 @@ struct Base58Decode /// We don't know the size of the result string beforehand (it's not byte-to-byte encoding), /// so we may need to do many resizes (the worst case -- we'll do it for each row) /// This way we do exponential resizes and one final resize after whole operation is complete - std::string decoded; - decoder.decode(decoded, source, srclen); + decoded.clear(); + if (srclen) + decoder.decode(decoded, source, srclen); size_t outlen = decoded.size(); if (processed_size + outlen >= current_allocated_size) { current_allocated_size += current_allocated_size; dst_data.resize(current_allocated_size); + auto processed_offset = dst_pos - dst; + dst = dst_data.data(); + dst_pos = dst; + dst_pos += processed_offset; } - std::strcpy(reinterpret_cast(dst_pos), decoded.c_str()); + std::memcpy(dst_pos, decoded.c_str(), ++outlen); source += srclen + 1; dst_pos += outlen; - *dst_pos = '\0'; - dst_pos += 1; dst_offsets[row] = dst_pos - dst; src_offset_prev = src_offsets[row]; @@ -216,7 +222,7 @@ public: auto dst_column = ColumnString::create(); - Func::process(input, dst_column, alphabet, input_rows_count); + Func::process(*input, dst_column, alphabet, input_rows_count); return dst_column; } diff --git a/src/Functions/configure_config.cmake b/src/Functions/configure_config.cmake index 776996d7e17..856d9a5682a 100644 --- a/src/Functions/configure_config.cmake +++ b/src/Functions/configure_config.cmake @@ -1,7 +1,7 @@ if (TARGET ch_contrib::fastops) set(USE_FASTOPS 1) endif() -if (TARGET ch_contrib::base58) +if (TARGET ch_contrib::base-x) set(USE_BASE58 1) endif() if (TARGET ch_contrib::base64) diff --git a/tests/queries/0_stateless/02337_base58.reference b/tests/queries/0_stateless/02337_base58.reference new file mode 100644 index 00000000000..718dfeb4a34 --- /dev/null +++ b/tests/queries/0_stateless/02337_base58.reference @@ -0,0 +1,48 @@ +32YCBjgZhV4AdCWHaCDNu + +f +fo +foo +foob +fooba +foobar +Hello world! + +f +fo +foo +foob +fooba +foobar +Hello world! + +f +fo +foo +foob +fooba +foobar +Hello world! + +f +fo +foo +foob +fooba +foobar +Hello world! + +2m +8o8 +bQbp +3csAg9 +CZJRhmz +t1Zv2yaZ + +f +fo +foo +foob +fooba +foobar +1 1 diff --git a/tests/queries/0_stateless/02337_base58.sql b/tests/queries/0_stateless/02337_base58.sql new file mode 100644 index 00000000000..b67993d2cc9 --- /dev/null +++ b/tests/queries/0_stateless/02337_base58.sql @@ -0,0 +1,17 @@ +-- Tags: no-fasttest + +SET send_logs_level = 'fatal'; + +SELECT base58Encode('Hold my beer...'); + +SELECT base58Decode(encoded, 'gmp') FROM (SELECT base58Encode(val, 'gmp') as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val)); +SELECT base58Decode(encoded, 'ripple') FROM (SELECT base58Encode(val, 'ripple') as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val)); +SELECT base58Decode(encoded, 'flickr') FROM (SELECT base58Encode(val, 'flickr') as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val)); +SELECT base58Decode(encoded, 'bitcoin') FROM (SELECT base58Encode(val, 'bitcoin') as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val)); + +SELECT base58Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val); +SELECT base58Decode(val) FROM (select arrayJoin(['', '2m', '8o8', 'bQbp', '3csAg9', 'CZJRhmz', 't1Zv2yaZ']) val); + +SELECT base58Decode(base58Encode('foo')) = 'foo', base58Encode(base58Decode('bQbp')) == 'bQbp'; + +SELECT base58Decode('Why_not?'); -- { serverError 1001 } \ No newline at end of file From 78d55d6f46ab3aee8cfdce1bb17e131dba54f6eb Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 20 Jun 2022 19:30:54 +0500 Subject: [PATCH 008/101] small fixes --- .gitmodules | 2 +- src/Functions/FunctionBase58Conversion.h | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.gitmodules b/.gitmodules index e1960d2144b..a0ad0b69822 100644 --- a/.gitmodules +++ b/.gitmodules @@ -270,4 +270,4 @@ url = https://github.com/schoentoon/hashidsxx.git [submodule "contrib/base-x"] path = contrib/base-x - url = https://github.com/Kronuz/base-x.git + url = https://github.com/ClickHouse/base-x.git diff --git a/src/Functions/FunctionBase58Conversion.h b/src/Functions/FunctionBase58Conversion.h index c0c54fd0ba9..e472e997c32 100644 --- a/src/Functions/FunctionBase58Conversion.h +++ b/src/Functions/FunctionBase58Conversion.h @@ -26,12 +26,13 @@ struct Base58Encode { static constexpr auto name = "base58Encode"; - static void process(const ColumnString& input, ColumnString::MutablePtr& dst_column, std::string& alphabet, size_t input_rows_count) + static void process(const ColumnString & input, ColumnString::MutablePtr & dst_column, const std::string & alphabet, size_t input_rows_count) { auto & dst_data = dst_column->getChars(); auto & dst_offsets = dst_column->getOffsets(); - size_t current_allocated_size = input.getChars().size(); + /// Wikipedia states Base58 has efficiency of 73%, and we take 1.5 scale to avoid reallocation in most cases + size_t current_allocated_size = ceil(1.5 * input.getChars().size()); dst_data.resize(current_allocated_size); dst_offsets.resize(input_rows_count); @@ -90,11 +91,12 @@ struct Base58Decode { static constexpr auto name = "base58Decode"; - static void process(const ColumnString& input, ColumnString::MutablePtr& dst_column, std::string& alphabet, size_t input_rows_count) + static void process(const ColumnString & input, ColumnString::MutablePtr & dst_column, const std::string & alphabet, size_t input_rows_count) { auto & dst_data = dst_column->getChars(); auto & dst_offsets = dst_column->getOffsets(); + /// We allocate probably even more then needed to avoid many resizes size_t current_allocated_size = input.getChars().size(); dst_data.resize(current_allocated_size); @@ -202,7 +204,7 @@ public: const ColumnString * input = checkAndGetColumn(column_string.get()); if (!input) throw Exception( - "Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(), + "Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName() + ", must be String", ErrorCodes::ILLEGAL_COLUMN); std::string alphabet = "bitcoin"; @@ -214,8 +216,8 @@ public: if (!alphabet_column) throw Exception("Second argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - if (alphabet = alphabet_column->getValue(); - alphabet != "bitcoin" && alphabet != "ripple" && alphabet != "flickr" && alphabet != "gmp") + alphabet = alphabet_column->getValue(); + if (alphabet != "bitcoin" && alphabet != "ripple" && alphabet != "flickr" && alphabet != "gmp") throw Exception("Second argument for function " + getName() + " must be 'bitcoin', 'ripple', 'gmp' or 'flickr'", ErrorCodes::ILLEGAL_COLUMN); } From 9c6b2b9ba075d61a022f02e5b49cf59cfa03450d Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 20 Jun 2022 20:10:28 +0500 Subject: [PATCH 009/101] added docs --- docs/en/development/contrib.md | 1 + .../functions/encoding-functions.md | 43 ++++++++++++++++++ docs/ru/development/contrib.md | 1 + .../functions/encoding-functions.md | 44 +++++++++++++++++++ 4 files changed, 89 insertions(+) diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 7713c397e46..831923dc43a 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -18,6 +18,7 @@ The list of third-party libraries: | aws-c-common | [Apache](https://github.com/ClickHouse-Extras/aws-c-common/blob/736a82d1697c108b04a277e66438a7f4e19b6857/LICENSE) | | aws-c-event-stream | [Apache](https://github.com/ClickHouse-Extras/aws-c-event-stream/blob/3bc33662f9ccff4f4cbcf9509cc78c26e022fde0/LICENSE) | | aws-checksums | [Apache](https://github.com/ClickHouse-Extras/aws-checksums/blob/519d6d9093819b6cf89ffff589a27ef8f83d0f65/LICENSE) | +| base58 | [MIT](https://github.com/ClickHouse/base-x/blob/3e58874643c087f57e82b0ff03825c933fab945a/LICENSE) | | base64 | [BSD 2-clause](https://github.com/ClickHouse-Extras/Turbo-Base64/blob/af9b331f2b4f30b41c70f3a571ff904a8251c1d3/LICENSE) | | boost | [Boost](https://github.com/ClickHouse-Extras/boost/blob/9cf09dbfd55a5c6202dedbdf40781a51b02c2675/LICENSE_1_0.txt) | | boringssl | [BSD](https://github.com/ClickHouse-Extras/boringssl/blob/a6a2e2ab3e44d97ce98e51c558e989f211de7eb3/LICENSE) | diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index 4ee71267a09..42a6d75952c 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -440,3 +440,46 @@ Result: │ [0,1,2,3,4,5,6,7] │ └───────────────────┘ ``` + +## Base58Encode(plaintext[, alphabet_name]) +## Base58Decode(plaintext[, alphabet_name]) + +Accepts a String and encodes/decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using specified alphabet. + +**Syntax** + +```sql +base58Encode(decoded[, alphabet_name]) +base58Decode(encoded[, alphabet_name]) +``` + +**Arguments** + +- `decoded` — [String](../../sql-reference/data-types/string.md) column or constant. +- `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid base58-encoded value, `1001 Exception` will be thrown. +- `alphabet_name` — String constant. Specifies alphabet used for encoding. Possible values: `gmp`, `bitcoin`, `ripple`, `flickr`. Default: `gmp`. + +**Returned value** + +- A string containing encoded/decoded value of 1st argument. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +``` sql +SELECT base58Encode('encode', 'flickr'); +SELECT base58Decode('izCFiDUY', 'ripple'); +``` + +Result: +```text +┌─base58Encode('encode', 'flickr')─┐ +│ SvyTHb1D │ +└──────────────────────────────────┘ +┌─base58Decode('izCFiDUY', 'ripple')─┐ +│ decode │ +└────────────────────────────────────┘ +``` diff --git a/docs/ru/development/contrib.md b/docs/ru/development/contrib.md index 1b99ec97553..0f4d22e90ce 100644 --- a/docs/ru/development/contrib.md +++ b/docs/ru/development/contrib.md @@ -18,6 +18,7 @@ sidebar_label: "Используемые сторонние библиотеки | aws-c-common | [Apache](https://github.com/ClickHouse-Extras/aws-c-common/blob/736a82d1697c108b04a277e66438a7f4e19b6857/LICENSE) | | aws-c-event-stream | [Apache](https://github.com/ClickHouse-Extras/aws-c-event-stream/blob/3bc33662f9ccff4f4cbcf9509cc78c26e022fde0/LICENSE) | | aws-checksums | [Apache](https://github.com/ClickHouse-Extras/aws-checksums/blob/519d6d9093819b6cf89ffff589a27ef8f83d0f65/LICENSE) | +| base58 | [MIT](https://github.com/ClickHouse/base-x/blob/3e58874643c087f57e82b0ff03825c933fab945a/LICENSE) | | base64 | [BSD 2-clause](https://github.com/ClickHouse-Extras/Turbo-Base64/blob/af9b331f2b4f30b41c70f3a571ff904a8251c1d3/LICENSE) | | boost | [Boost](https://github.com/ClickHouse-Extras/boost/blob/9cf09dbfd55a5c6202dedbdf40781a51b02c2675/LICENSE_1_0.txt) | | boringssl | [BSD](https://github.com/ClickHouse-Extras/boringssl/blob/a6a2e2ab3e44d97ce98e51c558e989f211de7eb3/LICENSE) | diff --git a/docs/ru/sql-reference/functions/encoding-functions.md b/docs/ru/sql-reference/functions/encoding-functions.md index 65d2b0e6538..255985fcc92 100644 --- a/docs/ru/sql-reference/functions/encoding-functions.md +++ b/docs/ru/sql-reference/functions/encoding-functions.md @@ -404,3 +404,47 @@ SELECT bitPositionsToArray(toInt8(-1)) AS bit_positions; │ [0,1,2,3,4,5,6,7] │ └───────────────────┘ ``` + +## Base58Encode(plaintext[, alphabet_name]) +## Base58Decode(plaintext[, alphabet_name]) + +Принимает на вход строку или колонку строк и кодирует/раскодирует их с помощью схемы кодирования [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) с использованием указанного алфавита. + +**Синтаксис** + +```sql +base58Encode(decoded[, alphabet_name]) +base58Decode(encoded[, alphabet_name]) +``` + +**Аргументы** + +- `decoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md). +- `encoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md). Если входная строка не является корректным кодом для какой-либо другой строки, возникнет исключение `1001`. +- `alphabet_name` — Строковая константа. Указывает алфавит, для которого необходимо получить код. Может принимать одно из следующих значений: `gmp`, `bitcoin`, `ripple`, `flickr`. По умолчанию: `gmp`. + +**Возвращаемое значение** + +- Строка, содержащая раскодированный/закодированный первый аргумент. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример:** + +Запрос: + +``` sql +SELECT base58Encode('encode', 'flickr'); +SELECT base58Decode('izCFiDUY', 'ripple'); +``` + +Результат: +```text +┌─base58Encode('encode', 'flickr')─┐ +│ SvyTHb1D │ +└──────────────────────────────────┘ +┌─base58Decode('izCFiDUY', 'ripple')─┐ +│ decode │ +└────────────────────────────────────┘ +``` + From d4e5686b997660b7943d0e6760b6f11baf91acf9 Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 20 Jun 2022 20:13:09 +0500 Subject: [PATCH 010/101] minor: fix message for base64 --- src/Functions/FunctionBase64Conversion.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index a1d6b966660..87a3309f7ef 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -85,7 +85,7 @@ public: { if (!WhichDataType(arguments[0].type).isString()) throw Exception( - "Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() + ". Must be String.", + "Illegal type " + arguments[0].type->getName() + " of 1st argument of function " + getName() + ". Must be String.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return std::make_shared(); @@ -98,7 +98,7 @@ public: if (!input) throw Exception( - "Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(), + "Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName() + ", must be of type String", ErrorCodes::ILLEGAL_COLUMN); auto dst_column = ColumnString::create(); From ad2b9cc4e4dcb740f42976e0e0ceffbc8eaf45b6 Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 20 Jun 2022 23:38:45 +0500 Subject: [PATCH 011/101] upd tests --- tests/queries/0_stateless/02337_base58.reference | 1 - tests/queries/0_stateless/02337_base58.sql | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02337_base58.reference b/tests/queries/0_stateless/02337_base58.reference index 718dfeb4a34..f4a2c95304b 100644 --- a/tests/queries/0_stateless/02337_base58.reference +++ b/tests/queries/0_stateless/02337_base58.reference @@ -45,4 +45,3 @@ foo foob fooba foobar -1 1 diff --git a/tests/queries/0_stateless/02337_base58.sql b/tests/queries/0_stateless/02337_base58.sql index b67993d2cc9..68dac97a20b 100644 --- a/tests/queries/0_stateless/02337_base58.sql +++ b/tests/queries/0_stateless/02337_base58.sql @@ -3,6 +3,8 @@ SET send_logs_level = 'fatal'; SELECT base58Encode('Hold my beer...'); +SELECT base58Encode('Hold my beer...', ''); -- { serverError 44 } +SELECT base58Encode('Hold my beer...', 'gmp', 'third'); -- { serverError 36 } SELECT base58Decode(encoded, 'gmp') FROM (SELECT base58Encode(val, 'gmp') as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val)); SELECT base58Decode(encoded, 'ripple') FROM (SELECT base58Encode(val, 'ripple') as encoded FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar', 'Hello world!']) val)); @@ -12,6 +14,4 @@ SELECT base58Decode(encoded, 'bitcoin') FROM (SELECT base58Encode(val, 'bitcoin' SELECT base58Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val); SELECT base58Decode(val) FROM (select arrayJoin(['', '2m', '8o8', 'bQbp', '3csAg9', 'CZJRhmz', 't1Zv2yaZ']) val); -SELECT base58Decode(base58Encode('foo')) = 'foo', base58Encode(base58Decode('bQbp')) == 'bQbp'; - -SELECT base58Decode('Why_not?'); -- { serverError 1001 } \ No newline at end of file +SELECT base58Decode('Why_not?'); -- { serverError 1001 } From 22af00b757f14b30c3912c57ad7ccbe1ea25e6c9 Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 20 Jun 2022 23:53:47 +0500 Subject: [PATCH 012/101] rename variable + fix handling of ENABLE_LIBRARIES --- contrib/base-x-cmake/CMakeLists.txt | 7 +++++++ src/Functions/FunctionBase58Conversion.h | 2 +- src/Functions/FunctionsBase58.cpp | 3 +-- src/Functions/config_functions.h.in | 2 +- src/Functions/configure_config.cmake | 2 +- src/Functions/registerFunctionsString.cpp | 4 ++-- src/configure_config.cmake | 4 ++-- 7 files changed, 15 insertions(+), 9 deletions(-) diff --git a/contrib/base-x-cmake/CMakeLists.txt b/contrib/base-x-cmake/CMakeLists.txt index 48cb54d307f..ab5696c9fb6 100644 --- a/contrib/base-x-cmake/CMakeLists.txt +++ b/contrib/base-x-cmake/CMakeLists.txt @@ -1,3 +1,10 @@ +option (ENABLE_BASEX "Enable base-x" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_BASEX) + message(STATUS "Not using base-x") + return() +endif() + set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base-x") set (SRCS diff --git a/src/Functions/FunctionBase58Conversion.h b/src/Functions/FunctionBase58Conversion.h index e472e997c32..ed4667aa63b 100644 --- a/src/Functions/FunctionBase58Conversion.h +++ b/src/Functions/FunctionBase58Conversion.h @@ -1,7 +1,7 @@ #pragma once #include "config_functions.h" -#if USE_BASE58 +#if USE_BASEX # include # include # include diff --git a/src/Functions/FunctionsBase58.cpp b/src/Functions/FunctionsBase58.cpp index 3ccb4d790ce..dc325ff8cfc 100644 --- a/src/Functions/FunctionsBase58.cpp +++ b/src/Functions/FunctionsBase58.cpp @@ -1,7 +1,6 @@ #include -#if USE_BASE58 +#if USE_BASEX #include -#include namespace DB { diff --git a/src/Functions/config_functions.h.in b/src/Functions/config_functions.h.in index 001712d5cef..0bfea78922b 100644 --- a/src/Functions/config_functions.h.in +++ b/src/Functions/config_functions.h.in @@ -2,7 +2,7 @@ // .h autogenerated by cmake! -#cmakedefine01 USE_BASE58 +#cmakedefine01 USE_BASEX #cmakedefine01 USE_BASE64 #cmakedefine01 USE_SIMDJSON #cmakedefine01 USE_RAPIDJSON diff --git a/src/Functions/configure_config.cmake b/src/Functions/configure_config.cmake index 856d9a5682a..1038c09e53f 100644 --- a/src/Functions/configure_config.cmake +++ b/src/Functions/configure_config.cmake @@ -2,7 +2,7 @@ if (TARGET ch_contrib::fastops) set(USE_FASTOPS 1) endif() if (TARGET ch_contrib::base-x) - set(USE_BASE58 1) + set(USE_BASEX 1) endif() if (TARGET ch_contrib::base64) set(USE_BASE64 1) diff --git a/src/Functions/registerFunctionsString.cpp b/src/Functions/registerFunctionsString.cpp index 43035ef51e7..248b6391b4f 100644 --- a/src/Functions/registerFunctionsString.cpp +++ b/src/Functions/registerFunctionsString.cpp @@ -49,7 +49,7 @@ void registerFunctionBase64Decode(FunctionFactory &); void registerFunctionTryBase64Decode(FunctionFactory &); #endif -#if USE_BASE58 +#if USE_BASEX void registerFunctionBase58Encode(FunctionFactory &); void registerFunctionBase58Decode(FunctionFactory &); #endif @@ -110,7 +110,7 @@ void registerFunctionsString(FunctionFactory & factory) registerFunctionTryBase64Decode(factory); #endif -#if USE_BASE58 +#if USE_BASEX registerFunctionBase58Encode(factory); registerFunctionBase58Decode(factory); #endif diff --git a/src/configure_config.cmake b/src/configure_config.cmake index fc2a858e75a..45e45b505d4 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -55,8 +55,8 @@ endif() if (TARGET ch_contrib::base64) set(USE_BASE64 1) endif() -if (TARGET ch_contrib::base58) - set(USE_BASE58 1) +if (TARGET ch_contrib::base-x) + set(USE_BASEX 1) endif() if (TARGET ch_contrib::yaml_cpp) set(USE_YAML_CPP 1) From 481644939a91189163059c64bdca5e6801b18a13 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 21 Jun 2022 15:42:36 +0200 Subject: [PATCH 013/101] Add describe cache command --- src/Common/FileCacheFactory.cpp | 33 ++++++++--- src/Common/FileCacheFactory.h | 19 +++--- .../DiskObjectStorageCommon.cpp | 2 +- .../InterpreterDescribeCacheQuery.cpp | 58 +++++++++++++++++++ .../InterpreterDescribeCacheQuery.h | 22 +++++++ src/Interpreters/InterpreterFactory.cpp | 6 ++ src/Parsers/ASTDescribeCacheQuery.h | 27 +++++++++ src/Parsers/ParserDescribeCacheQuery.cpp | 34 +++++++++++ src/Parsers/ParserDescribeCacheQuery.h | 20 +++++++ src/Parsers/ParserQueryWithOutput.cpp | 3 + 10 files changed, 209 insertions(+), 15 deletions(-) create mode 100644 src/Interpreters/InterpreterDescribeCacheQuery.cpp create mode 100644 src/Interpreters/InterpreterDescribeCacheQuery.h create mode 100644 src/Parsers/ASTDescribeCacheQuery.h create mode 100644 src/Parsers/ParserDescribeCacheQuery.cpp create mode 100644 src/Parsers/ParserDescribeCacheQuery.h diff --git a/src/Common/FileCacheFactory.cpp b/src/Common/FileCacheFactory.cpp index e126ac014f2..b2dc2739d8a 100644 --- a/src/Common/FileCacheFactory.cpp +++ b/src/Common/FileCacheFactory.cpp @@ -1,5 +1,4 @@ #include "FileCacheFactory.h" -#include "IFileCache.h" #include "LRUFileCache.h" namespace DB @@ -19,7 +18,7 @@ FileCacheFactory & FileCacheFactory::instance() FileCacheFactory::CacheByBasePath FileCacheFactory::getAll() { std::lock_guard lock(mutex); - return caches; + return caches_by_path; } const FileCacheSettings & FileCacheFactory::getSettings(const std::string & cache_base_path) @@ -33,10 +32,10 @@ const FileCacheSettings & FileCacheFactory::getSettings(const std::string & cach throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path); } -FileCacheFactory::CacheData * FileCacheFactory::getImpl(const std::string & cache_base_path, std::lock_guard &) +FileCacheFactory::FileCacheData * FileCacheFactory::getImpl(const std::string & cache_base_path, std::lock_guard &) { - auto it = caches.find(cache_base_path); - if (it == caches.end()) + auto it = caches_by_path.find(cache_base_path); + if (it == caches_by_path.end()) return nullptr; return &it->second; } @@ -53,17 +52,37 @@ FileCachePtr FileCacheFactory::get(const std::string & cache_base_path) } FileCachePtr FileCacheFactory::getOrCreate( - const std::string & cache_base_path, const FileCacheSettings & file_cache_settings) + const std::string & cache_base_path, const FileCacheSettings & file_cache_settings, const std::string & name) { std::lock_guard lock(mutex); auto * cache_data = getImpl(cache_base_path, lock); if (cache_data) + { + registerCacheByName(name, *cache_data); return cache_data->cache; + } auto cache = std::make_shared(cache_base_path, file_cache_settings); - caches.emplace(cache_base_path, CacheData(cache, file_cache_settings)); + FileCacheData result{cache, file_cache_settings}; + + registerCacheByName(name, result); + caches_by_path.emplace(cache_base_path, result); + return cache; } +FileCacheFactory::FileCacheData FileCacheFactory::getByName(const std::string & name) +{ + auto it = caches_by_name.find(name); + if (it == caches_by_name.end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by name: {}", name); + return it->second; +} + +void FileCacheFactory::registerCacheByName(const std::string & name, const FileCacheData & cache_data) +{ + caches_by_name.emplace(std::make_pair(name, cache_data)); +} + } diff --git a/src/Common/FileCacheFactory.h b/src/Common/FileCacheFactory.h index 3518f487b6d..007d77643b6 100644 --- a/src/Common/FileCacheFactory.h +++ b/src/Common/FileCacheFactory.h @@ -15,20 +15,21 @@ namespace DB */ class FileCacheFactory final : private boost::noncopyable { - struct CacheData +public: + struct FileCacheData { FileCachePtr cache; FileCacheSettings settings; - CacheData(FileCachePtr cache_, const FileCacheSettings & settings_) : cache(cache_), settings(settings_) {} + FileCacheData(FileCachePtr cache_, const FileCacheSettings & settings_) : cache(cache_), settings(settings_) {} }; - using CacheByBasePath = std::unordered_map; + using CacheByBasePath = std::unordered_map; + using CacheByName = std::unordered_map; -public: static FileCacheFactory & instance(); - FileCachePtr getOrCreate(const std::string & cache_base_path, const FileCacheSettings & file_cache_settings); + FileCachePtr getOrCreate(const std::string & cache_base_path, const FileCacheSettings & file_cache_settings, const std::string & name); FileCachePtr get(const std::string & cache_base_path); @@ -36,11 +37,15 @@ public: const FileCacheSettings & getSettings(const std::string & cache_base_path); + FileCacheData getByName(const std::string & name); + private: - CacheData * getImpl(const std::string & cache_base_path, std::lock_guard &); + FileCacheData * getImpl(const std::string & cache_base_path, std::lock_guard &); + void registerCacheByName(const std::string & name, const FileCacheData & cache_data); std::mutex mutex; - CacheByBasePath caches; + CacheByBasePath caches_by_path; + CacheByName caches_by_name; }; } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp b/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp index 9311cb2c12a..99606a18517 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp @@ -73,7 +73,7 @@ FileCachePtr getCachePtrForDisk( FileCacheSettings file_cache_settings; file_cache_settings.loadFromConfig(config, config_prefix); - auto cache = FileCacheFactory::instance().getOrCreate(cache_base_path, file_cache_settings); + auto cache = FileCacheFactory::instance().getOrCreate(cache_base_path, file_cache_settings, name); cache->initialize(); auto * log = &Poco::Logger::get("Disk(" + name + ")"); diff --git a/src/Interpreters/InterpreterDescribeCacheQuery.cpp b/src/Interpreters/InterpreterDescribeCacheQuery.cpp new file mode 100644 index 00000000000..554153922b5 --- /dev/null +++ b/src/Interpreters/InterpreterDescribeCacheQuery.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +static Block getSampleBlock() +{ + ColumnsWithTypeAndName columns{ + ColumnWithTypeAndName{std::make_shared(), "max_size"}, + ColumnWithTypeAndName{std::make_shared(), "max_elements"}, + ColumnWithTypeAndName{std::make_shared(), "max_file_segment_size"}, + ColumnWithTypeAndName{std::make_shared>(), "cache_on_write_operations"}, + ColumnWithTypeAndName{std::make_shared>(), "enable_cache_hits_threshold"}, + ColumnWithTypeAndName{std::make_shared(), "current_size"}, + ColumnWithTypeAndName{std::make_shared(), "current_elements"}, + ColumnWithTypeAndName{std::make_shared(), "path"} + }; + return Block(columns); +} + +BlockIO InterpreterDescribeCacheQuery::execute() +{ + ColumnsDescription columns; + + const auto & ast = query_ptr->as(); + Block sample_block = getSampleBlock(); + MutableColumns res_columns = sample_block.cloneEmptyColumns(); + + auto cache_data = FileCacheFactory::instance().getByName(ast.cache_name); + const auto & settings = cache_data.settings; + const auto & cache = cache_data.cache; + + res_columns[0]->insert(settings.max_size); + res_columns[1]->insert(settings.max_elements); + res_columns[2]->insert(settings.max_file_segment_size); + res_columns[3]->insert(settings.cache_on_write_operations); + res_columns[4]->insert(settings.enable_cache_hits_threshold); + res_columns[5]->insert(cache->getUsedCacheSize()); + res_columns[6]->insert(cache->getFileSegmentsNum()); + res_columns[7]->insert(cache->getBasePath()); + + BlockIO res; + size_t num_rows = res_columns[0]->size(); + auto source = std::make_shared(sample_block, Chunk(std::move(res_columns), num_rows)); + res.pipeline = QueryPipeline(std::move(source)); + + return res; +} + +} diff --git a/src/Interpreters/InterpreterDescribeCacheQuery.h b/src/Interpreters/InterpreterDescribeCacheQuery.h new file mode 100644 index 00000000000..31b77f7d064 --- /dev/null +++ b/src/Interpreters/InterpreterDescribeCacheQuery.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +class InterpreterDescribeCacheQuery : public IInterpreter, WithContext +{ +public: + InterpreterDescribeCacheQuery(const ASTPtr & query_ptr_, ContextPtr context_) : WithContext(context_), query_ptr(query_ptr_) {} + + BlockIO execute() override; + +private: + ASTPtr query_ptr; +}; + + +} diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 5dcee1eae05..c212eb50b97 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -200,6 +202,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context); diff --git a/src/Parsers/ASTDescribeCacheQuery.h b/src/Parsers/ASTDescribeCacheQuery.h new file mode 100644 index 00000000000..31e883ff500 --- /dev/null +++ b/src/Parsers/ASTDescribeCacheQuery.h @@ -0,0 +1,27 @@ +#include + +namespace DB +{ + +class ASTDescribeCacheQuery : public ASTQueryWithOutput +{ +public: + String cache_name; + + String getID(char) const override { return "DescribeCacheQuery"; } + + ASTPtr clone() const override + { + auto res = std::make_shared(*this); + cloneOutputOptions(*res); + return res; + } + +protected: + void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "DESCRIBE CACHE" << (settings.hilite ? hilite_none : "") << " " << cache_name; + } +}; + +} diff --git a/src/Parsers/ParserDescribeCacheQuery.cpp b/src/Parsers/ParserDescribeCacheQuery.cpp new file mode 100644 index 00000000000..e64c1a850f2 --- /dev/null +++ b/src/Parsers/ParserDescribeCacheQuery.cpp @@ -0,0 +1,34 @@ +#include +#include +#include +#include + +namespace DB +{ + + +bool ParserDescribeCacheQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword p_describe("DESCRIBE"); + ParserKeyword p_desc("DESC"); + ParserKeyword p_cache("CACHE"); + ParserLiteral p_cache_name; + + if ((!p_describe.ignore(pos, expected) && !p_desc.ignore(pos, expected)) + || !p_cache.ignore(pos, expected)) + return false; + + auto query = std::make_shared(); + + ASTPtr ast; + if (!p_cache_name.parse(pos, ast, expected)) + return false; + + query->cache_name = ast->as()->value.safeGet(); + node = query; + + return true; +} + + +} diff --git a/src/Parsers/ParserDescribeCacheQuery.h b/src/Parsers/ParserDescribeCacheQuery.h new file mode 100644 index 00000000000..ae717875678 --- /dev/null +++ b/src/Parsers/ParserDescribeCacheQuery.h @@ -0,0 +1,20 @@ +#pragma once + + +#include +#include + + +namespace DB +{ + +/** Query (DESCRIBE | DESC) CACHE 'cache_name' + */ +class ParserDescribeCacheQuery : public IParserBase +{ +protected: + const char * getName() const override { return "DESCRIBE CACHE query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 6041f986a49..a6af19fc670 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserSelectWithUnionQuery select_p; ParserTablePropertiesQuery table_p; ParserDescribeTableQuery describe_table_p; + ParserDescribeCacheQuery describe_cache_p; ParserShowProcesslistQuery show_processlist_p; ParserCreateQuery create_p; ParserAlterQuery alter_p; @@ -59,6 +61,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec || show_create_access_entity_p.parse(pos, query, expected) /// should be before `show_tables_p` || show_tables_p.parse(pos, query, expected) || table_p.parse(pos, query, expected) + || describe_cache_p.parse(pos, query, expected) || describe_table_p.parse(pos, query, expected) || show_processlist_p.parse(pos, query, expected) || create_p.parse(pos, query, expected) From 7fb5217384210a478fb46b329c4aefc93dfe83e7 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 21 Jun 2022 16:18:18 +0200 Subject: [PATCH 014/101] Update ASTDescribeCacheQuery.h --- src/Parsers/ASTDescribeCacheQuery.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Parsers/ASTDescribeCacheQuery.h b/src/Parsers/ASTDescribeCacheQuery.h index 31e883ff500..17ec697d565 100644 --- a/src/Parsers/ASTDescribeCacheQuery.h +++ b/src/Parsers/ASTDescribeCacheQuery.h @@ -1,3 +1,4 @@ +#pragma once #include namespace DB From a7ff956c83fc9c955d8d3b17a18e1a270db3b8f4 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 22 Jun 2022 14:56:37 +0500 Subject: [PATCH 015/101] updated docs --- .../functions/string-functions.md | 42 ++++++++++++++++++ .../functions/encoding-functions.md | 44 ------------------- .../functions/string-functions.md | 42 ++++++++++++++++++ 3 files changed, 84 insertions(+), 44 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 66e9aa98e67..7c6ae903acf 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -494,6 +494,48 @@ If the ‘s’ string is non-empty and does not contain the ‘c’ character at Returns the string ‘s’ that was converted from the encoding in ‘from’ to the encoding in ‘to’. +## Base58Encode(plaintext[, alphabet_name]), Base58Decode(encoded_text[, alphabet_name]) + +Accepts a String and encodes/decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using specified alphabet. + +**Syntax** + +```sql +base58Encode(decoded[, alphabet_name]) +base58Decode(encoded[, alphabet_name]) +``` + +**Arguments** + +- `decoded` — [String](../../sql-reference/data-types/string.md) column or constant. +- `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid base58-encoded value, an exception is thrown. +- `alphabet_name` — String constant. Specifies alphabet used for encoding. Possible values: `gmp`, `bitcoin`, `ripple`, `flickr`. Default: `gmp`. + +**Returned value** + +- A string containing encoded/decoded value of 1st argument. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +``` sql +SELECT base58Encode('encode', 'flickr'); +SELECT base58Decode('izCFiDUY', 'ripple'); +``` + +Result: +```text +┌─base58Encode('encode', 'flickr')─┐ +│ SvyTHb1D │ +└──────────────────────────────────┘ +┌─base58Decode('izCFiDUY', 'ripple')─┐ +│ decode │ +└────────────────────────────────────┘ +``` + ## base64Encode(s) Encodes ‘s’ string into base64 diff --git a/docs/ru/sql-reference/functions/encoding-functions.md b/docs/ru/sql-reference/functions/encoding-functions.md index 255985fcc92..65d2b0e6538 100644 --- a/docs/ru/sql-reference/functions/encoding-functions.md +++ b/docs/ru/sql-reference/functions/encoding-functions.md @@ -404,47 +404,3 @@ SELECT bitPositionsToArray(toInt8(-1)) AS bit_positions; │ [0,1,2,3,4,5,6,7] │ └───────────────────┘ ``` - -## Base58Encode(plaintext[, alphabet_name]) -## Base58Decode(plaintext[, alphabet_name]) - -Принимает на вход строку или колонку строк и кодирует/раскодирует их с помощью схемы кодирования [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) с использованием указанного алфавита. - -**Синтаксис** - -```sql -base58Encode(decoded[, alphabet_name]) -base58Decode(encoded[, alphabet_name]) -``` - -**Аргументы** - -- `decoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md). -- `encoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md). Если входная строка не является корректным кодом для какой-либо другой строки, возникнет исключение `1001`. -- `alphabet_name` — Строковая константа. Указывает алфавит, для которого необходимо получить код. Может принимать одно из следующих значений: `gmp`, `bitcoin`, `ripple`, `flickr`. По умолчанию: `gmp`. - -**Возвращаемое значение** - -- Строка, содержащая раскодированный/закодированный первый аргумент. - -Тип: [String](../../sql-reference/data-types/string.md). - -**Пример:** - -Запрос: - -``` sql -SELECT base58Encode('encode', 'flickr'); -SELECT base58Decode('izCFiDUY', 'ripple'); -``` - -Результат: -```text -┌─base58Encode('encode', 'flickr')─┐ -│ SvyTHb1D │ -└──────────────────────────────────┘ -┌─base58Decode('izCFiDUY', 'ripple')─┐ -│ decode │ -└────────────────────────────────────┘ -``` - diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index 6cd4cea403e..e85a97e0099 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -490,6 +490,48 @@ SELECT concat(key1, key2), sum(value) FROM key_val GROUP BY (key1, key2); Возвращает сконвертированную из кодировки from в кодировку to строку s. +## Base58Encode(plaintext[, alphabet_name]), Base58Decode(plaintext[, alphabet_name]) {#base58} + +Принимает на вход строку или колонку строк и кодирует/раскодирует их с помощью схемы кодирования [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) с использованием указанного алфавита. + +**Синтаксис** + +```sql +base58Encode(decoded[, alphabet_name]) +base58Decode(encoded[, alphabet_name]) +``` + +**Аргументы** + +- `decoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md). +- `encoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md). Если входная строка не является корректным кодом для какой-либо другой строки, возникнет исключение `1001`. +- `alphabet_name` — Строковая константа. Указывает алфавит, для которого необходимо получить код. Может принимать одно из следующих значений: `gmp`, `bitcoin`, `ripple`, `flickr`. По умолчанию: `gmp`. + +**Возвращаемое значение** + +- Строка, содержащая раскодированный/закодированный первый аргумент. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример:** + +Запрос: + +``` sql +SELECT base58Encode('encode', 'flickr'); +SELECT base58Decode('izCFiDUY', 'ripple'); +``` + +Результат: +```text +┌─base58Encode('encode', 'flickr')─┐ +│ SvyTHb1D │ +└──────────────────────────────────┘ +┌─base58Decode('izCFiDUY', 'ripple')─┐ +│ decode │ +└────────────────────────────────────┘ +``` + ## base64Encode(s) {#base64encode} Производит кодирование строки s в base64-представление. From 7ae7b1e421d26ccdd491fb9b8379eb0df7610afe Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 22 Jun 2022 14:58:39 +0500 Subject: [PATCH 016/101] fix wrong docs location --- .../functions/encoding-functions.md | 43 ------------------- 1 file changed, 43 deletions(-) diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index 42a6d75952c..4ee71267a09 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -440,46 +440,3 @@ Result: │ [0,1,2,3,4,5,6,7] │ └───────────────────┘ ``` - -## Base58Encode(plaintext[, alphabet_name]) -## Base58Decode(plaintext[, alphabet_name]) - -Accepts a String and encodes/decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using specified alphabet. - -**Syntax** - -```sql -base58Encode(decoded[, alphabet_name]) -base58Decode(encoded[, alphabet_name]) -``` - -**Arguments** - -- `decoded` — [String](../../sql-reference/data-types/string.md) column or constant. -- `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid base58-encoded value, `1001 Exception` will be thrown. -- `alphabet_name` — String constant. Specifies alphabet used for encoding. Possible values: `gmp`, `bitcoin`, `ripple`, `flickr`. Default: `gmp`. - -**Returned value** - -- A string containing encoded/decoded value of 1st argument. - -Type: [String](../../sql-reference/data-types/string.md). - -**Example** - -Query: - -``` sql -SELECT base58Encode('encode', 'flickr'); -SELECT base58Decode('izCFiDUY', 'ripple'); -``` - -Result: -```text -┌─base58Encode('encode', 'flickr')─┐ -│ SvyTHb1D │ -└──────────────────────────────────┘ -┌─base58Decode('izCFiDUY', 'ripple')─┐ -│ decode │ -└────────────────────────────────────┘ -``` From 64a2f3734b1a552147b342a31d848b278041d906 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 6 May 2022 15:06:56 +0200 Subject: [PATCH 017/101] Protect ReadResult internals from MergeTreeRangeReader clients --- src/Storages/MergeTree/IMergeTreeReader.h | 2 -- .../MergeTree/MergeTreeRangeReader.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeRangeReader.h | 24 ++++++++++++------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index 7c5977b5cb2..b13db9c3255 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -89,8 +89,6 @@ protected: using ColumnPosition = std::optional; ColumnPosition findColumnForOffsets(const String & column_name) const; - friend class MergeTreeRangeReader::DelayedStream; - private: /// Alter conversions, which must be applied on fly if required MergeTreeData::AlterConversions alter_conversions; diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index d71dddc938c..c89ba8cf165 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -64,7 +64,7 @@ static void filterColumns(Columns & columns, const ColumnPtr & filter) } -static size_t getLastMark(const MergeTreeRangeReader::ReadResult::RangesInfo & ranges) +size_t MergeTreeRangeReader::ReadResult::getLastMark(const MergeTreeRangeReader::ReadResult::RangesInfo & ranges) { size_t current_task_last_mark = 0; for (const auto & mark_range : ranges) @@ -1001,7 +1001,7 @@ Columns MergeTreeRangeReader::continueReadingChain(ReadResult & result, size_t & const auto & rows_per_granule = result.rowsPerGranule(); const auto & started_ranges = result.startedRanges(); - size_t current_task_last_mark = getLastMark(started_ranges); + size_t current_task_last_mark = ReadResult::getLastMark(started_ranges); size_t next_range_to_start = 0; auto size = rows_per_granule.size(); diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 21ed35e6a78..9d03b987419 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -57,6 +57,7 @@ public: bool isCurrentRangeFinished() const; bool isInitialized() const { return is_initialized; } +private: /// Accumulates sequential read() requests to perform a large read instead of multiple small reads class DelayedStream { @@ -144,10 +145,23 @@ public: size_t ceilRowsToCompleteGranules(size_t rows_num) const; }; +public: /// Statistics after next reading step. class ReadResult { public: + Columns columns; + size_t num_rows = 0; + + /// The number of rows were added to block as a result of reading chain. + size_t numReadRows() const { return num_read_rows; } + /// The number of bytes read from disk. + size_t numBytesRead() const { return num_bytes_read; } + + private: + /// Only MergeTreeRangeReader is supposed to access ReadResult internals. + friend class MergeTreeRangeReader; + using NumRows = std::vector; struct RangeInfo @@ -161,13 +175,11 @@ public: const RangesInfo & startedRanges() const { return started_ranges; } const NumRows & rowsPerGranule() const { return rows_per_granule; } + static size_t getLastMark(const MergeTreeRangeReader::ReadResult::RangesInfo & ranges); + /// The number of rows were read at LAST iteration in chain. <= num_added_rows + num_filtered_rows. size_t totalRowsPerGranule() const { return total_rows_per_granule; } - /// The number of rows were added to block as a result of reading chain. - size_t numReadRows() const { return num_read_rows; } size_t numRowsToSkipInLastGranule() const { return num_rows_to_skip_in_last_granule; } - /// The number of bytes read from disk. - size_t numBytesRead() const { return num_bytes_read; } /// Filter you need to apply to newly-read columns in order to add them to block. const ColumnUInt8 * getFilterOriginal() const { return filter_original ? filter_original : filter; } const ColumnUInt8 * getFilter() const { return filter; } @@ -195,13 +207,10 @@ public: size_t countBytesInResultFilter(const IColumn::Filter & filter); - Columns columns; - size_t num_rows = 0; bool need_filter = false; Block block_before_prewhere; - private: RangesInfo started_ranges; /// The number of rows read from each granule. /// Granule here is not number of rows between two marks @@ -234,7 +243,6 @@ public: const Block & getSampleBlock() const { return sample_block; } private: - ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges); Columns continueReadingChain(ReadResult & result, size_t & num_rows); void executePrewhereActionsAndFilterColumns(ReadResult & result); From 538c8c96fc63cf9663a86cd1b6b66a9da76c3215 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 9 May 2022 16:34:44 +0200 Subject: [PATCH 018/101] Dump row_level_filter as a part of prewhere_info --- src/Storages/IStorage.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index cd6c49d6e3b..0fcf48b9afc 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -297,6 +297,11 @@ std::string PrewhereInfo::dump() const WriteBufferFromOwnString ss; ss << "PrewhereDagInfo\n"; + if (row_level_filter) + { + ss << "row_level_filter " << row_level_filter->dumpDAG() << "\n"; + } + if (prewhere_actions) { ss << "prewhere_actions " << prewhere_actions->dumpDAG() << "\n"; From 159ab765fb3ddead9457923ed136eb368e69a3d0 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 13 May 2022 21:00:44 +0200 Subject: [PATCH 019/101] Moved estimateNumRows from lambda to function --- .../MergeTreeBaseSelectProcessor.cpp | 79 ++++++++++--------- 1 file changed, 42 insertions(+), 37 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index ca5e7393666..fbe5daed196 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -228,6 +228,46 @@ void MergeTreeBaseSelectProcessor::initializeRangeReaders(MergeTreeReadTask & cu } } +static UInt64 estimateNumRows(const MergeTreeReadTask & current_task, UInt64 current_preferred_block_size_bytes, + UInt64 current_max_block_size_rows, UInt64 current_preferred_max_column_in_block_size_bytes, double min_filtration_ratio) + //, const MergeTreeRangeReader & current_reader) +{ + const MergeTreeRangeReader & current_reader = current_task.range_reader; + + if (!current_task.size_predictor) + return static_cast(current_max_block_size_rows); + + /// Calculates number of rows will be read using preferred_block_size_bytes. + /// Can't be less than avg_index_granularity. + size_t rows_to_read = current_task.size_predictor->estimateNumRows(current_preferred_block_size_bytes); + if (!rows_to_read) + return rows_to_read; + auto total_row_in_current_granule = current_reader.numRowsInCurrentGranule(); + rows_to_read = std::max(total_row_in_current_granule, rows_to_read); + + if (current_preferred_max_column_in_block_size_bytes) + { + /// Calculates number of rows will be read using preferred_max_column_in_block_size_bytes. + auto rows_to_read_for_max_size_column + = current_task.size_predictor->estimateNumRowsForMaxSizeColumn(current_preferred_max_column_in_block_size_bytes); + double filtration_ratio = std::max(min_filtration_ratio, 1.0 - current_task.size_predictor->filtered_rows_ratio); + auto rows_to_read_for_max_size_column_with_filtration + = static_cast(rows_to_read_for_max_size_column / filtration_ratio); + + /// If preferred_max_column_in_block_size_bytes is used, number of rows to read can be less than current_index_granularity. + rows_to_read = std::min(rows_to_read, rows_to_read_for_max_size_column_with_filtration); + } + + auto unread_rows_in_current_granule = current_reader.numPendingRowsInCurrentGranule(); + if (unread_rows_in_current_granule >= rows_to_read) + return rows_to_read; + + const MergeTreeIndexGranularity & index_granularity = current_task.data_part->index_granularity; + + return index_granularity.countMarksForRows(current_reader.currentMark(), rows_to_read, current_reader.numReadRowsInCurrentGranule()); +} + + Chunk MergeTreeBaseSelectProcessor::readFromPartImpl() { @@ -237,45 +277,10 @@ Chunk MergeTreeBaseSelectProcessor::readFromPartImpl() const UInt64 current_max_block_size_rows = max_block_size_rows; const UInt64 current_preferred_block_size_bytes = preferred_block_size_bytes; const UInt64 current_preferred_max_column_in_block_size_bytes = preferred_max_column_in_block_size_bytes; - const MergeTreeIndexGranularity & index_granularity = task->data_part->index_granularity; const double min_filtration_ratio = 0.00001; - auto estimate_num_rows = [current_preferred_block_size_bytes, current_max_block_size_rows, - &index_granularity, current_preferred_max_column_in_block_size_bytes, min_filtration_ratio]( - MergeTreeReadTask & current_task, MergeTreeRangeReader & current_reader) - { - if (!current_task.size_predictor) - return static_cast(current_max_block_size_rows); - - /// Calculates number of rows will be read using preferred_block_size_bytes. - /// Can't be less than avg_index_granularity. - size_t rows_to_read = current_task.size_predictor->estimateNumRows(current_preferred_block_size_bytes); - if (!rows_to_read) - return rows_to_read; - auto total_row_in_current_granule = current_reader.numRowsInCurrentGranule(); - rows_to_read = std::max(total_row_in_current_granule, rows_to_read); - - if (current_preferred_max_column_in_block_size_bytes) - { - /// Calculates number of rows will be read using preferred_max_column_in_block_size_bytes. - auto rows_to_read_for_max_size_column - = current_task.size_predictor->estimateNumRowsForMaxSizeColumn(current_preferred_max_column_in_block_size_bytes); - double filtration_ratio = std::max(min_filtration_ratio, 1.0 - current_task.size_predictor->filtered_rows_ratio); - auto rows_to_read_for_max_size_column_with_filtration - = static_cast(rows_to_read_for_max_size_column / filtration_ratio); - - /// If preferred_max_column_in_block_size_bytes is used, number of rows to read can be less than current_index_granularity. - rows_to_read = std::min(rows_to_read, rows_to_read_for_max_size_column_with_filtration); - } - - auto unread_rows_in_current_granule = current_reader.numPendingRowsInCurrentGranule(); - if (unread_rows_in_current_granule >= rows_to_read) - return rows_to_read; - - return index_granularity.countMarksForRows(current_reader.currentMark(), rows_to_read, current_reader.numReadRowsInCurrentGranule()); - }; - - UInt64 recommended_rows = estimate_num_rows(*task, task->range_reader); + UInt64 recommended_rows = estimateNumRows(*task, current_preferred_block_size_bytes, + current_max_block_size_rows, current_preferred_max_column_in_block_size_bytes, min_filtration_ratio);//, task->range_reader); UInt64 rows_to_read = std::max(static_cast(1), std::min(current_max_block_size_rows, recommended_rows)); auto read_result = task->range_reader.read(rows_to_read, task->mark_ranges); From 87b669f43968499e2c4270fe7d9cefb68f839b37 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Wed, 18 May 2022 20:22:42 +0200 Subject: [PATCH 020/101] Intermediate changes --- src/Storages/MergeTree/MergeTreeRangeReader.cpp | 8 ++++++++ src/Storages/MergeTree/MergeTreeRangeReader.h | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index c89ba8cf165..779ac3a5ce4 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -358,6 +358,7 @@ void MergeTreeRangeReader::ReadResult::setFilterConstFalse() num_rows = 0; } +/// void MergeTreeRangeReader::ReadResult::optimize(bool can_read_incomplete_granules, bool allow_filter_columns) { if (total_rows_per_granule == 0 || filter == nullptr) @@ -415,6 +416,7 @@ void MergeTreeRangeReader::ReadResult::optimize(bool can_read_incomplete_granule need_filter = true; } +/// For each read granule size_t MergeTreeRangeReader::ReadResult::countZeroTails(const IColumn::Filter & filter_vec, NumRows & zero_tails, bool can_read_incomplete_granules) const { zero_tails.resize(0); @@ -594,6 +596,7 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con return count; } +/// Filter size must match total_rows_per_granule void MergeTreeRangeReader::ReadResult::setFilter(const ColumnPtr & new_filter) { if (!new_filter && filter) @@ -1023,6 +1026,9 @@ Columns MergeTreeRangeReader::continueReadingChain(ReadResult & result, size_t & stream.skip(result.numRowsToSkipInLastGranule()); num_rows += stream.finalize(columns); + + // TODO: here we can verify that stream and prev_reader->stream are at exactly same offset + /// added_rows may be zero if all columns were read in prewhere and it's ok. if (num_rows && num_rows != result.totalRowsPerGranule()) throw Exception("RangeReader read " + toString(num_rows) + " rows, but " @@ -1039,6 +1045,8 @@ static void checkCombinedFiltersSize(size_t bytes_in_first_filter, size_t second "does not match second filter size ({})", bytes_in_first_filter, second_filter_size); } +/// Second filter size must be equal to number of 1s in the first filter. +/// The result size is equal to first filter size. static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second) { ConstantFilterDescription first_const_descr(*first); diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 9d03b987419..5dc52a7c0b8 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -207,7 +207,7 @@ public: size_t countBytesInResultFilter(const IColumn::Filter & filter); - bool need_filter = false; + bool need_filter = false; /// TODO: what exactly does it mean? Block block_before_prewhere; From 4360fd9798338f655e7a77eb49d671421bc2bd8c Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Thu, 2 Jun 2022 19:58:25 +0200 Subject: [PATCH 021/101] Vector of structs instead of multiple vectors --- .../MergeTreeBaseSelectProcessor.cpp | 1 - src/Storages/MergeTree/MergeTreeReadPool.cpp | 21 ++++++++++--------- src/Storages/MergeTree/MergeTreeReadPool.h | 15 ++++++++----- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index fbe5daed196..085b16ff4d2 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -268,7 +268,6 @@ static UInt64 estimateNumRows(const MergeTreeReadTask & current_task, UInt64 cur } - Chunk MergeTreeBaseSelectProcessor::readFromPartImpl() { if (task->size_predictor) diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index d9a1f742609..89e021497ac 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -135,13 +135,15 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t min_marks_to_read, size_t } } - auto curr_task_size_predictor = !per_part_size_predictor[part_idx] ? nullptr - : std::make_unique(*per_part_size_predictor[part_idx]); /// make a copy + const auto & per_part = per_part_params[part_idx]; + + auto curr_task_size_predictor = !per_part.size_predictor ? nullptr + : std::make_unique(*per_part.size_predictor); /// make a copy return std::make_unique( part.data_part, ranges_to_get_from_part, part.part_index_in_query, ordered_names, - per_part_column_name_set[part_idx], per_part_columns[part_idx], per_part_pre_columns[part_idx], - prewhere_info && prewhere_info->remove_prewhere_column, per_part_should_reorder[part_idx], std::move(curr_task_size_predictor)); + per_part.column_name_set, per_part.task_columns.columns, per_part.task_columns.pre_columns, + prewhere_info && prewhere_info->remove_prewhere_column, per_part.task_columns.should_reorder, std::move(curr_task_size_predictor)); } Block MergeTreeReadPool::getHeader() const @@ -216,15 +218,14 @@ std::vector MergeTreeReadPool::fillPerPartInfo(const RangesInDataParts & auto size_predictor = !predict_block_size_bytes ? nullptr : MergeTreeBaseSelectProcessor::getSizePredictor(part.data_part, task_columns, sample_block); - per_part_size_predictor.emplace_back(std::move(size_predictor)); + auto & per_part = per_part_params.emplace_back(); + + per_part.size_predictor = std::move(size_predictor); /// will be used to distinguish between PREWHERE and WHERE columns when applying filter const auto & required_column_names = task_columns.columns.getNames(); - per_part_column_name_set.emplace_back(required_column_names.begin(), required_column_names.end()); - - per_part_pre_columns.push_back(std::move(task_columns.pre_columns)); - per_part_columns.push_back(std::move(task_columns.columns)); - per_part_should_reorder.push_back(task_columns.should_reorder); + per_part.column_name_set = {required_column_names.begin(), required_column_names.end()}; + per_part.task_columns = std::move(task_columns); parts_with_idx.push_back({ part.data_part, part.part_index_in_query }); } diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index b9db1254e08..d882c0d761f 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -99,11 +99,16 @@ private: const Names column_names; bool do_not_steal_tasks; bool predict_block_size_bytes; - std::vector per_part_column_name_set; - std::vector per_part_columns; - std::vector per_part_pre_columns; - std::vector per_part_should_reorder; - std::vector per_part_size_predictor; + + struct PerPartParams + { + MergeTreeReadTaskColumns task_columns; + NameSet column_name_set; + MergeTreeBlockSizePredictorPtr size_predictor; + }; + + std::vector per_part_params; + PrewhereInfoPtr prewhere_info; struct Part From 6a26325fabed3e5e07c6038320bc4a5ea52c7336 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Tue, 7 Jun 2022 09:03:11 +0200 Subject: [PATCH 022/101] Test dirty hacks for multiple PREWHERE steps --- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- .../MergeTreeBaseSelectProcessor.cpp | 74 +++++++++++-- .../MergeTree/MergeTreeBaseSelectProcessor.h | 2 +- .../MergeTree/MergeTreeBlockReadUtils.cpp | 56 ++++++---- .../MergeTree/MergeTreeBlockReadUtils.h | 41 +++---- .../MergeTreeInOrderSelectProcessor.cpp | 6 +- .../MergeTree/MergeTreeRangeReader.cpp | 101 ++++++++++++------ src/Storages/MergeTree/MergeTreeRangeReader.h | 36 ++++++- src/Storages/MergeTree/MergeTreeReadPool.cpp | 4 +- .../MergeTreeReverseSelectProcessor.cpp | 4 +- .../MergeTree/MergeTreeSelectProcessor.cpp | 12 ++- .../MergeTreeThreadSelectProcessor.cpp | 28 +++-- src/Storages/SelectQueryInfo.h | 37 +++++++ 13 files changed, 301 insertions(+), 102 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 1bf188596d7..98a06af88c9 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1176,7 +1176,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

row_level_filter, expressions.prewhere_info->row_level_column_name, - false); + true);//false); row_level_filter_step->setStepDescription("Row-level security filter (PREWHERE)"); query_plan.addStep(std::move(row_level_filter_step)); diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 085b16ff4d2..bdfda6f3d5f 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -74,14 +74,36 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( prewhere_actions = std::make_unique(); if (prewhere_info->row_level_filter) - prewhere_actions->row_level_filter = std::make_shared(prewhere_info->row_level_filter, actions_settings); + { + PrewhereExprStep row_level_filter_step + { + .actions = std::make_shared(prewhere_info->row_level_filter, actions_settings), + .column_name = prewhere_info->row_level_column_name, + .remove_column = true, + .need_filter = true //prewhere_info->need_filter // ???? + }; - prewhere_actions->prewhere_actions = std::make_shared(prewhere_info->prewhere_actions, actions_settings); + prewhere_actions->steps.emplace_back(std::move(row_level_filter_step)); + +// prewhere_actions->row_level_filter = std::make_shared(prewhere_info->row_level_filter, actions_settings); + } +// prewhere_actions->row_level_column_name = prewhere_info->row_level_column_name; + +// prewhere_actions->prewhere_actions = std::make_shared(prewhere_info->prewhere_actions, actions_settings); +// prewhere_actions->prewhere_column_name = prewhere_info->prewhere_column_name; +// prewhere_actions->remove_prewhere_column = prewhere_info->remove_prewhere_column; +// prewhere_actions->need_filter = prewhere_info->need_filter; + + PrewhereExprStep prewhere_step + { + .actions = std::make_shared(prewhere_info->prewhere_actions, actions_settings), + .column_name = prewhere_info->prewhere_column_name, + .remove_column = prewhere_info->remove_prewhere_column, + .need_filter = prewhere_info->need_filter + }; + + prewhere_actions->steps.emplace_back(std::move(prewhere_step)); - prewhere_actions->row_level_column_name = prewhere_info->row_level_column_name; - prewhere_actions->prewhere_column_name = prewhere_info->prewhere_column_name; - prewhere_actions->remove_prewhere_column = prewhere_info->remove_prewhere_column; - prewhere_actions->need_filter = prewhere_info->need_filter; } } @@ -204,6 +226,38 @@ Chunk MergeTreeBaseSelectProcessor::generate() void MergeTreeBaseSelectProcessor::initializeRangeReaders(MergeTreeReadTask & current_task) { + MergeTreeRangeReader* prev_reader = nullptr; + bool last_reader = false; + + if (prewhere_info) + { + assert(prewhere_actions->steps.size() == pre_reader_for_step.size()); + + for (size_t i = 0; i < prewhere_actions->steps.size(); ++i) + { + last_reader = reader->getColumns().empty() && (i + 1 == prewhere_actions->steps.size()); + current_task.pre_range_reader.push_back( + MergeTreeRangeReader(pre_reader_for_step[i].get(), prev_reader, &prewhere_actions->steps[i], last_reader, non_const_virtual_column_names)); + + + prev_reader = ¤t_task.pre_range_reader.back(); + } + + } + + if (!last_reader) + { + current_task.range_reader = MergeTreeRangeReader(reader.get(), prev_reader, nullptr, true, non_const_virtual_column_names); + } + else + { + // HACK!! + // If all columns are read by pre_range_readers than move last pre_range_reader into range_reader + current_task.range_reader = std::move(current_task.pre_range_reader.back()); + current_task.pre_range_reader.pop_back(); + } + +/* if (prewhere_info) { if (reader->getColumns().empty()) @@ -226,6 +280,7 @@ void MergeTreeBaseSelectProcessor::initializeRangeReaders(MergeTreeReadTask & cu { current_task.range_reader = MergeTreeRangeReader(reader.get(), nullptr, nullptr, true, non_const_virtual_column_names); } +//*/ } static UInt64 estimateNumRows(const MergeTreeReadTask & current_task, UInt64 current_preferred_block_size_bytes, @@ -606,9 +661,12 @@ std::unique_ptr MergeTreeBaseSelectProcessor::getSi const Block & sample_block) { const auto & required_column_names = task_columns.columns.getNames(); - const auto & required_pre_column_names = task_columns.pre_columns.getNames(); NameSet complete_column_names(required_column_names.begin(), required_column_names.end()); - complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end()); + for (const auto & pre_columns_per_step : task_columns.pre_columns) + { + const auto & required_pre_column_names = pre_columns_per_step.getNames(); + complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end()); + } return std::make_unique( data_part, Names(complete_column_names.begin(), complete_column_names.end()), sample_block); diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index ac01221ff26..368dce7deaa 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -115,7 +115,7 @@ protected: using MergeTreeReaderPtr = std::unique_ptr; MergeTreeReaderPtr reader; - MergeTreeReaderPtr pre_reader; + std::vector pre_reader_for_step; MergeTreeReadTaskPtr task; diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index f74823eaec2..fd0f9c20d3f 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -131,12 +131,12 @@ NameSet injectRequiredColumns( MergeTreeReadTask::MergeTreeReadTask( const MergeTreeData::DataPartPtr & data_part_, const MarkRanges & mark_ranges_, size_t part_index_in_query_, - const Names & ordered_names_, const NameSet & column_name_set_, const NamesAndTypesList & columns_, - const NamesAndTypesList & pre_columns_, bool remove_prewhere_column_, bool should_reorder_, + const Names & ordered_names_, const NameSet & column_name_set_, const MergeTreeReadTaskColumns & task_columns_, + bool remove_prewhere_column_, MergeTreeBlockSizePredictorPtr && size_predictor_) : data_part{data_part_}, mark_ranges{mark_ranges_}, part_index_in_query{part_index_in_query_}, - ordered_names{ordered_names_}, column_name_set{column_name_set_}, columns{columns_}, pre_columns{pre_columns_}, - remove_prewhere_column{remove_prewhere_column_}, should_reorder{should_reorder_}, size_predictor{std::move(size_predictor_)} + ordered_names{ordered_names_}, column_name_set{column_name_set_}, task_columns{task_columns_}, + remove_prewhere_column{remove_prewhere_column_}, size_predictor{std::move(size_predictor_)} { } @@ -279,19 +279,40 @@ MergeTreeReadTaskColumns getReadTaskColumns( bool should_reorder = !injectRequiredColumns( storage, storage_snapshot, data_part, with_subcolumns, column_names).empty(); + MergeTreeReadTaskColumns result; + auto options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects(); + if (with_subcolumns) + options.withSubcolumns(); + +// NameSet all_pre_columns; + if (prewhere_info) { - pre_column_names = prewhere_info->prewhere_actions->getRequiredColumnsNames(); + NameSet pre_name_set; +// TODO: for each prewhere step + + /// 1. Columns for row level filter if (prewhere_info->row_level_filter) { - NameSet names(pre_column_names.begin(), pre_column_names.end()); + pre_column_names = prewhere_info->row_level_filter->getRequiredColumnsNames(); - for (auto & name : prewhere_info->row_level_filter->getRequiredColumnsNames()) - { - if (!names.contains(name)) - pre_column_names.push_back(name); - } +////// HACK!!! + result.pre_columns.push_back(storage_snapshot->getColumnsByNames(options, pre_column_names)); +////////////// + + pre_name_set.insert(pre_column_names.begin(), pre_column_names.end()); + +// all_pre_columns.insert(pre_column_names.begin(), pre_column_names.end()); + } + + /// 2. Columns for prewhere + pre_column_names.clear(); + for (const auto & name : prewhere_info->prewhere_actions->getRequiredColumnsNames()) + { + if (pre_name_set.contains(name)) + continue; + pre_column_names.push_back(name); } if (pre_column_names.empty()) @@ -303,7 +324,6 @@ MergeTreeReadTaskColumns getReadTaskColumns( if (!injected_pre_columns.empty()) should_reorder = true; - const NameSet pre_name_set(pre_column_names.begin(), pre_column_names.end()); Names post_column_names; for (const auto & name : column_names) @@ -313,14 +333,14 @@ MergeTreeReadTaskColumns getReadTaskColumns( column_names = post_column_names; } - MergeTreeReadTaskColumns result; - NamesAndTypesList all_columns; +// NamesAndTypesList all_columns; - auto options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects(); - if (with_subcolumns) - options.withSubcolumns(); - result.pre_columns = storage_snapshot->getColumnsByNames(options, pre_column_names); +////// HACK!!! + result.pre_columns.push_back(storage_snapshot->getColumnsByNames(options, pre_column_names)); +////////////// + + /// 3. Rest of the requested columns result.columns = storage_snapshot->getColumnsByNames(options, column_names); result.should_reorder = should_reorder; return result; diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index 1c347970558..eea8c227e14 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -30,6 +30,16 @@ NameSet injectRequiredColumns( Names & columns); +struct MergeTreeReadTaskColumns +{ + /// column names to read during WHERE + NamesAndTypesList columns; + /// column names to read during each PREWHERE step + std::vector pre_columns; + /// resulting block may require reordering in accordance with `ordered_names` + bool should_reorder = false; +}; + /// A batch of work for MergeTreeThreadSelectProcessor struct MergeTreeReadTask { @@ -43,39 +53,32 @@ struct MergeTreeReadTask const Names & ordered_names; /// used to determine whether column should be filtered during PREWHERE or WHERE const NameSet & column_name_set; - /// column names to read during WHERE - const NamesAndTypesList & columns; - /// column names to read during PREWHERE - const NamesAndTypesList & pre_columns; + + const MergeTreeReadTaskColumns& task_columns; + +// /// column names to read during WHERE +// const NamesAndTypesList & columns; +// /// column names to read during PREWHERE +// const NamesAndTypesList & pre_columns; /// should PREWHERE column be returned to requesting side? const bool remove_prewhere_column; - /// resulting block may require reordering in accordance with `ordered_names` - const bool should_reorder; +// /// resulting block may require reordering in accordance with `ordered_names` +// const bool should_reorder; /// Used to satistfy preferred_block_size_bytes limitation MergeTreeBlockSizePredictorPtr size_predictor; /// Used to save current range processing status MergeTreeRangeReader range_reader; - MergeTreeRangeReader pre_range_reader; + std::deque pre_range_reader; bool isFinished() const { return mark_ranges.empty() && range_reader.isCurrentRangeFinished(); } MergeTreeReadTask( const MergeTreeData::DataPartPtr & data_part_, const MarkRanges & mark_ranges_, size_t part_index_in_query_, - const Names & ordered_names_, const NameSet & column_name_set_, const NamesAndTypesList & columns_, - const NamesAndTypesList & pre_columns_, bool remove_prewhere_column_, bool should_reorder_, + const Names & ordered_names_, const NameSet & column_name_set_, const MergeTreeReadTaskColumns & task_columns_, + bool remove_prewhere_column_, MergeTreeBlockSizePredictorPtr && size_predictor_); }; -struct MergeTreeReadTaskColumns -{ - /// column names to read during WHERE - NamesAndTypesList columns; - /// column names to read during PREWHERE - NamesAndTypesList pre_columns; - /// resulting block may require reordering in accordance with `ordered_names` - bool should_reorder = false; -}; - MergeTreeReadTaskColumns getReadTaskColumns( const MergeTreeData & storage, const StorageSnapshotPtr & storage_snapshot, diff --git a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp index 961106af51b..280ce82cfce 100644 --- a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp @@ -34,9 +34,9 @@ try : getSizePredictor(data_part, task_columns, sample_block); task = std::make_unique( - data_part, mark_ranges_for_task, part_index_in_query, ordered_names, column_name_set, task_columns.columns, - task_columns.pre_columns, prewhere_info && prewhere_info->remove_prewhere_column, - task_columns.should_reorder, std::move(size_predictor)); + data_part, mark_ranges_for_task, part_index_in_query, ordered_names, column_name_set, task_columns, + prewhere_info && prewhere_info->remove_prewhere_column, + std::move(size_predictor)); return true; } diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 779ac3a5ce4..a51afed86cf 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -43,7 +43,7 @@ static void filterColumns(Columns & columns, const IColumn::Filter & filter) } } } - +/* static void filterColumns(Columns & columns, const ColumnPtr & filter) { ConstantFilterDescription const_descr(*filter); @@ -62,7 +62,7 @@ static void filterColumns(Columns & columns, const ColumnPtr & filter) FilterDescription descr(*filter); filterColumns(columns, *descr.data); } - +*/ size_t MergeTreeRangeReader::ReadResult::getLastMark(const MergeTreeRangeReader::ReadResult::RangesInfo & ranges) { @@ -647,7 +647,7 @@ size_t MergeTreeRangeReader::ReadResult::countBytesInResultFilter(const IColumn: MergeTreeRangeReader::MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, - const PrewhereExprInfo * prewhere_info_, + const PrewhereExprStep * prewhere_info_, bool last_reader_in_chain_, const Names & non_const_virtual_column_names_) : merge_tree_reader(merge_tree_reader_) @@ -675,7 +675,17 @@ MergeTreeRangeReader::MergeTreeRangeReader( if (prewhere_info) { - if (prewhere_info->row_level_filter) +// for (const auto & step : prewhere_info->steps) + const auto & step = *prewhere_info; + { + if (step.actions) + step.actions->execute(sample_block, true); + + if (step.remove_column) + sample_block.erase(step.column_name); + } + +/* if (prewhere_info->row_level_filter) { prewhere_info->row_level_filter->execute(sample_block, true); sample_block.erase(prewhere_info->row_level_column_name); @@ -686,6 +696,7 @@ MergeTreeRangeReader::MergeTreeRangeReader( if (prewhere_info->remove_prewhere_column) sample_block.erase(prewhere_info->prewhere_column_name); +*/ } } @@ -1107,13 +1118,15 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r const auto & header = merge_tree_reader->getColumns(); size_t num_columns = header.size(); - if (result.columns.size() != (num_columns + non_const_virtual_column_names.size())) + // TODO: properly check that we have columns from previous steps and newly read required columns + if (result.columns.size() < num_columns + non_const_virtual_column_names.size()) throw Exception("Invalid number of columns passed to MergeTreeRangeReader. " "Expected " + toString(num_columns) + ", " "got " + toString(result.columns.size()), ErrorCodes::LOGICAL_ERROR); + ColumnPtr current_filter; ColumnPtr filter; - ColumnPtr row_level_filter; +// ColumnPtr row_level_filter; size_t prewhere_column_pos; { @@ -1130,8 +1143,12 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r } } - for (auto name_and_type = header.begin(); pos < num_columns; ++pos, ++name_and_type) - block.insert({result.columns[pos], name_and_type->type, name_and_type->name}); +// for (auto name_and_type = header.begin(); pos < num_columns; ++pos, ++name_and_type) +// block.insert({result.columns[pos], name_and_type->type, name_and_type->name}); + for (const auto & name_and_type : header) { + block.insert({result.columns[pos], name_and_type.type, name_and_type.name}); + ++pos; + } for (const auto & column_name : non_const_virtual_column_names) { @@ -1145,7 +1162,7 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r /// Columns might be projected out. We need to store them here so that default columns can be evaluated later. result.block_before_prewhere = block; - if (prewhere_info->row_level_filter) +/* if (prewhere_info->row_level_filter) { prewhere_info->row_level_filter->execute(block); auto row_level_filter_pos = block.getPositionByName(prewhere_info->row_level_column_name); @@ -1159,44 +1176,50 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r else block.setColumns(columns); } +*/ + prewhere_info->actions->execute(block); - prewhere_info->prewhere_actions->execute(block); - - prewhere_column_pos = block.getPositionByName(prewhere_info->prewhere_column_name); + prewhere_column_pos = block.getPositionByName(prewhere_info->column_name); result.columns.clear(); result.columns.reserve(block.columns()); for (auto & col : block) result.columns.emplace_back(std::move(col.column)); - filter.swap(result.columns[prewhere_column_pos]); + current_filter.swap(result.columns[prewhere_column_pos]); + filter = current_filter; + + } if (result.getFilter()) { - /// TODO: implement for prewhere chain. - /// In order to do it we need combine filter and result.filter, where filter filters only '1' in result.filter. - throw Exception("MergeTreeRangeReader chain with several prewhere actions in not implemented.", - ErrorCodes::LOGICAL_ERROR); + ColumnPtr prev_filter = result.getFilterHolder(); + filter = combineFilters(prev_filter, std::move(filter)); + +// /// TODO: implement for prewhere chain. +// /// In order to do it we need combine filter and result.filter, where filter filters only '1' in result.filter. +// throw Exception("MergeTreeRangeReader chain with several prewhere actions in not implemented.", +// ErrorCodes::LOGICAL_ERROR); } - if (filter && row_level_filter) - { - row_level_filter = combineFilters(std::move(row_level_filter), filter); - result.setFilter(row_level_filter); - } - else +// if (filter && row_level_filter) +// { +// row_level_filter = combineFilters(std::move(row_level_filter), filter); +// result.setFilter(row_level_filter); +// } +// else result.setFilter(filter); /// If there is a WHERE, we filter in there, and only optimize IO and shrink columns here if (!last_reader_in_chain) - result.optimize(merge_tree_reader->canReadIncompleteGranules(), prewhere_info->row_level_filter == nullptr); + result.optimize(merge_tree_reader->canReadIncompleteGranules(), true); // TODO: prewhere_info->row_level_filter == nullptr); /// If we read nothing or filter gets optimized to nothing if (result.totalRowsPerGranule() == 0) result.setFilterConstFalse(); /// If we need to filter in PREWHERE - else if (prewhere_info->need_filter || result.need_filter || prewhere_info->row_level_filter) + else if (prewhere_info->need_filter || result.need_filter)// || prewhere_info->row_level_filter) { /// If there is a filter and without optimized if (result.getFilter() && last_reader_in_chain) @@ -1216,10 +1239,22 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r /// filter might be shrunk while columns not const auto * result_filter = result.getFilterOriginal(); - if (row_level_filter) - filterColumns(result.columns, filter); - else - filterColumns(result.columns, result_filter->getData()); +// if (row_level_filter) +// filterColumns(result.columns, filter); +// else + // filterColumns(result.columns, result_filter->getData()); + + + { + FilterDescription current_filter_descr(*current_filter); + // TODO: need to filter by current filter column that has num_rows size, not the original size + + // TODO: properly handle const true and const false cases + if (current_filter_descr.countBytesInFilter() == 0) + result.columns.clear(); + else if (current_filter_descr.data) + filterColumns(result.columns, *current_filter_descr.data); + } result.need_filter = true; @@ -1242,22 +1277,22 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r /// Check if the PREWHERE column is needed if (!result.columns.empty()) { - if (prewhere_info->remove_prewhere_column) + if (prewhere_info->remove_column) result.columns.erase(result.columns.begin() + prewhere_column_pos); else result.columns[prewhere_column_pos] = - getSampleBlock().getByName(prewhere_info->prewhere_column_name).type-> + getSampleBlock().getByName(prewhere_info->column_name).type-> createColumnConst(result.num_rows, 1u)->convertToFullColumnIfConst(); } } /// Filter in WHERE instead else { - if (prewhere_info->remove_prewhere_column) + if (prewhere_info->remove_column) result.columns.erase(result.columns.begin() + prewhere_column_pos); else { - auto type = getSampleBlock().getByName(prewhere_info->prewhere_column_name).type; + auto type = getSampleBlock().getByName(prewhere_info->column_name).type; ColumnWithTypeAndName col(result.getFilterHolder()->convertToFullColumnIfConst(), std::make_shared(), ""); result.columns[prewhere_column_pos] = castColumn(col, type); result.clearFilter(); // Acting as a flag to not filter in PREWHERE diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 5dc52a7c0b8..057469746c6 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -18,6 +18,37 @@ using PrewhereInfoPtr = std::shared_ptr; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; +//* +struct PrewhereExprStep +{ + ExpressionActionsPtr actions; + String column_name; + bool remove_column = false; + bool need_filter = false; +}; + +/// The same as PrewhereInfo, but with ExpressionActions instead of ActionsDAG +struct PrewhereExprInfo +{ +// /// Actions for row level security filter. Applied separately before prewhere_actions. +// /// This actions are separate because prewhere condition should not be executed over filtered rows. +// ExpressionActionsPtr row_level_filter; +// /// Actions which are executed on block in order to get filter column for prewhere step. +// ExpressionActionsPtr prewhere_actions; +// String row_level_column_name; +// String prewhere_column_name; +// bool remove_prewhere_column = false; +// bool need_filter = false; + + std::vector steps; + +///// PrewhereExprStep deleted_row_filter; +///// PrewhereExprStep row_level_filter; +///// PrewhereExprStep prewhere; +}; + +/*/ + /// The same as PrewhereInfo, but with ExpressionActions instead of ActionsDAG struct PrewhereExprInfo { @@ -31,6 +62,7 @@ struct PrewhereExprInfo bool remove_prewhere_column = false; bool need_filter = false; }; +//*/ /// MergeTreeReader iterator which allows sequential reading for arbitrary number of rows between pairs of marks in the same part. /// Stores reading state, which can be inside granule. Can skip rows in current granule and start reading from next mark. @@ -41,7 +73,7 @@ public: MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, - const PrewhereExprInfo * prewhere_info_, + const PrewhereExprStep * prewhere_info_, bool last_reader_in_chain_, const Names & non_const_virtual_column_names); @@ -251,7 +283,7 @@ private: IMergeTreeReader * merge_tree_reader = nullptr; const MergeTreeIndexGranularity * index_granularity = nullptr; MergeTreeRangeReader * prev_reader = nullptr; /// If not nullptr, read from prev_reader firstly. - const PrewhereExprInfo * prewhere_info; + const PrewhereExprStep * prewhere_info; Stream stream; diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 89e021497ac..d44d250149e 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -142,8 +142,8 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t min_marks_to_read, size_t return std::make_unique( part.data_part, ranges_to_get_from_part, part.part_index_in_query, ordered_names, - per_part.column_name_set, per_part.task_columns.columns, per_part.task_columns.pre_columns, - prewhere_info && prewhere_info->remove_prewhere_column, per_part.task_columns.should_reorder, std::move(curr_task_size_predictor)); + per_part.column_name_set, per_part.task_columns, + prewhere_info && prewhere_info->remove_prewhere_column, std::move(curr_task_size_predictor)); } Block MergeTreeReadPool::getHeader() const diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp index 1a2ab8bff5b..84548d357b7 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp @@ -31,8 +31,8 @@ try task = std::make_unique( data_part, mark_ranges_for_task, part_index_in_query, ordered_names, column_name_set, - task_columns.columns, task_columns.pre_columns, prewhere_info && prewhere_info->remove_prewhere_column, - task_columns.should_reorder, std::move(size_predictor)); + task_columns, prewhere_info && prewhere_info->remove_prewhere_column, + std::move(size_predictor)); return true; } diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 1d41c2c254d..e9539a5cf59 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -67,9 +67,13 @@ void MergeTreeSelectProcessor::initializeReaders() all_mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, {}, {}); if (prewhere_info) - pre_reader = data_part->getReader(task_columns.pre_columns, storage_snapshot->getMetadataForQuery(), - all_mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, {}, {}); - + { + for (const auto & pre_columns_for_step : task_columns.pre_columns) + { + pre_reader_for_step.push_back(data_part->getReader(pre_columns_for_step, storage_snapshot->getMetadataForQuery(), + all_mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, {}, {})); + } + } } @@ -80,7 +84,7 @@ void MergeTreeSelectProcessor::finish() * buffers don't waste memory. */ reader.reset(); - pre_reader.reset(); + pre_reader_for_step.clear(); data_part.reset(); } diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp index 063f018b1a4..0c1bd239076 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp @@ -111,14 +111,19 @@ void MergeTreeThreadSelectProcessor::finalizeNewTask() owned_uncompressed_cache = storage.getContext()->getUncompressedCache(); owned_mark_cache = storage.getContext()->getMarkCache(); - reader = task->data_part->getReader(task->columns, metadata_snapshot, task->mark_ranges, + reader = task->data_part->getReader(task->task_columns.columns, metadata_snapshot, task->mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, IMergeTreeReader::ValueSizeMap{}, profile_callback); if (prewhere_info) - pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, task->mark_ranges, - owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, - IMergeTreeReader::ValueSizeMap{}, profile_callback); + { + for (const auto & pre_columns_per_step : task->task_columns.pre_columns) + { + pre_reader_for_step.push_back(task->data_part->getReader(pre_columns_per_step, metadata_snapshot, task->mark_ranges, + owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, + IMergeTreeReader::ValueSizeMap{}, profile_callback)); + } + } } else { @@ -126,14 +131,19 @@ void MergeTreeThreadSelectProcessor::finalizeNewTask() if (part_name != last_readed_part_name) { /// retain avg_value_size_hints - reader = task->data_part->getReader(task->columns, metadata_snapshot, task->mark_ranges, + reader = task->data_part->getReader(task->task_columns.columns, metadata_snapshot, task->mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, reader->getAvgValueSizeHints(), profile_callback); if (prewhere_info) - pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, task->mark_ranges, - owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, - reader->getAvgValueSizeHints(), profile_callback); + { + for (const auto & pre_columns_per_step : task->task_columns.pre_columns) + { + pre_reader_for_step.push_back(task->data_part->getReader(pre_columns_per_step, metadata_snapshot, task->mark_ranges, + owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, + reader->getAvgValueSizeHints(), profile_callback)); + } + } } } @@ -144,7 +154,7 @@ void MergeTreeThreadSelectProcessor::finalizeNewTask() void MergeTreeThreadSelectProcessor::finish() { reader.reset(); - pre_reader.reset(); + pre_reader_for_step.clear(); } diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index bdb4c392c48..23b592ce7ac 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -47,6 +47,42 @@ using MergeTreeDataSelectAnalysisResultPtr = std::shared_ptr; +/* +struct PrewhereStep +{ + ActionsDAGPtr prewhere_actions; + String prewhere_column_name; + bool remove_prewhere_column = false; + bool need_filter = false; + + explicit PrewhereStep(ActionsDAGPtr prewhere_actions_, String prewhere_column_name_) + : prewhere_actions(std::move(prewhere_actions_)), prewhere_column_name(std::move(prewhere_column_name_)) {} + + std::string dump() const; +}; + +struct PrewhereInfo +{ +// /// Actions for row level security filter. Applied separately before prewhere_actions. +// /// This actions are separate because prewhere condition should not be executed over filtered rows. +// ActionsDAGPtr row_level_filter; +// /// Actions which are executed on block in order to get filter column for prewhere step. +// ActionsDAGPtr prewhere_actions; +// String row_level_column_name; +// String prewhere_column_name; +// bool remove_prewhere_column = false; +// bool need_filter = false; + + std::vector steps; + + PrewhereInfo() = default; +// explicit PrewhereInfo(ActionsDAGPtr prewhere_actions_, String prewhere_column_name_) +// : prewhere_actions(std::move(prewhere_actions_)), prewhere_column_name(std::move(prewhere_column_name_)) {} + + std::string dump() const; +}; +/*/ + struct PrewhereInfo { /// Actions for row level security filter. Applied separately before prewhere_actions. @@ -65,6 +101,7 @@ struct PrewhereInfo std::string dump() const; }; +//*/ /// Helper struct to store all the information about the filter expression. struct FilterInfo From 4e426c63cc53165dfd735176e7f1b2a0098d5094 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Tue, 7 Jun 2022 23:00:34 +0200 Subject: [PATCH 023/101] Debuging test failures --- .../MergeTree/MergeTreeBaseSelectProcessor.cpp | 5 +++++ .../MergeTree/MergeTreeBlockReadUtils.h | 11 +++++++++++ src/Storages/MergeTree/MergeTreeRangeReader.cpp | 17 +++++++++++++++++ src/Storages/MergeTree/MergeTreeRangeReader.h | 2 ++ .../MergeTree/MergeTreeSelectProcessor.cpp | 3 +++ .../MergeTreeThreadSelectProcessor.cpp | 7 +++++++ 6 files changed, 45 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index bdfda6f3d5f..7a5ed63fa0d 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -104,6 +104,11 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( prewhere_actions->steps.emplace_back(std::move(prewhere_step)); + +// std::cerr +// << "PREWHERE ========================\n" +// << prewhere_actions->dump() +// << "========================\n\n"; } } diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index eea8c227e14..a3e754f83c3 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -38,6 +38,17 @@ struct MergeTreeReadTaskColumns std::vector pre_columns; /// resulting block may require reordering in accordance with `ordered_names` bool should_reorder = false; + + std::string dump() const + { + std::ostringstream s; + for (size_t i = 0; i < pre_columns.size(); ++i) + { + s << "STEP " << i << ": " << pre_columns[i].toString() << "\n"; + } + s << "COLUMNS: " << columns.toString() << "\n"; + return s.str(); + } }; /// A batch of work for MergeTreeThreadSelectProcessor diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index a51afed86cf..a20414d9616 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -1300,4 +1301,20 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r } } +std::string PrewhereExprInfo::dump() const +{ + std::ostringstream s; + + for (size_t i = 0; i < steps.size(); ++i) + { + s << "STEP " << i << ":\n" + << " ACTIONS: " << steps[i].actions->dumpActions() << "\n" + << " COLUMN: " << steps[i].column_name << "\n" + << " REMOVE_COLUMN: " << steps[i].remove_column << "\n" + << " NEED_FILTER: " << steps[i].need_filter << "\n"; + } + + return s.str(); +} + } diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 057469746c6..15712a77797 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -45,6 +45,8 @@ struct PrewhereExprInfo ///// PrewhereExprStep deleted_row_filter; ///// PrewhereExprStep row_level_filter; ///// PrewhereExprStep prewhere; + + std::string dump() const; }; /*/ diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index e9539a5cf59..eb4746cbf04 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -50,6 +50,9 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( void MergeTreeSelectProcessor::initializeReaders() { +pre_reader_for_step.clear(); // is it being reused??? + + task_columns = getReadTaskColumns( storage, storage_snapshot, data_part, required_columns, prewhere_info, /*with_subcolumns=*/ true); diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp index 0c1bd239076..47615ae6e3b 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp @@ -105,6 +105,9 @@ void MergeTreeThreadSelectProcessor::finalizeNewTask() auto profile_callback = [this](ReadBufferFromFileBase::ProfileInfo info_) { pool->profileFeedback(info_); }; const auto & metadata_snapshot = storage_snapshot->metadata; +//std::cerr << "==============TASK:==============\n" << task->task_columns.dump() << "\n"; +//std::cerr << "pre_reader_for_step.size() " << pre_reader_for_step.size() << "\n\n"; + if (!reader) { if (use_uncompressed_cache) @@ -115,6 +118,8 @@ void MergeTreeThreadSelectProcessor::finalizeNewTask() owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, IMergeTreeReader::ValueSizeMap{}, profile_callback); +pre_reader_for_step.clear(); // is it being reused??? + if (prewhere_info) { for (const auto & pre_columns_per_step : task->task_columns.pre_columns) @@ -135,6 +140,8 @@ void MergeTreeThreadSelectProcessor::finalizeNewTask() owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, reader->getAvgValueSizeHints(), profile_callback); +pre_reader_for_step.clear(); // is it being reused??? + if (prewhere_info) { for (const auto & pre_columns_per_step : task->task_columns.pre_columns) From a9e3b8d29ee1ab89f6ea4da5a92ef2bda73b226f Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Thu, 9 Jun 2022 00:00:36 +0200 Subject: [PATCH 024/101] Don't read the same columns again --- .../MergeTree/MergeTreeBlockReadUtils.cpp | 30 ++++++++++--------- .../MergeTree/MergeTreeRangeReader.cpp | 6 ++++ 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index fd0f9c20d3f..ed6d5afaf09 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -295,35 +295,37 @@ MergeTreeReadTaskColumns getReadTaskColumns( /// 1. Columns for row level filter if (prewhere_info->row_level_filter) { - pre_column_names = prewhere_info->row_level_filter->getRequiredColumnsNames(); + Names row_filter_column_names = prewhere_info->row_level_filter->getRequiredColumnsNames(); ////// HACK!!! - result.pre_columns.push_back(storage_snapshot->getColumnsByNames(options, pre_column_names)); + result.pre_columns.push_back(storage_snapshot->getColumnsByNames(options, row_filter_column_names)); ////////////// - pre_name_set.insert(pre_column_names.begin(), pre_column_names.end()); + pre_name_set.insert(row_filter_column_names.begin(), row_filter_column_names.end()); // all_pre_columns.insert(pre_column_names.begin(), pre_column_names.end()); } /// 2. Columns for prewhere - pre_column_names.clear(); - for (const auto & name : prewhere_info->prewhere_actions->getRequiredColumnsNames()) + Names all_pre_column_names = prewhere_info->prewhere_actions->getRequiredColumnsNames(); + +// if (pre_column_names.empty()) +// pre_column_names.push_back(column_names[0]); + + const auto injected_pre_columns = injectRequiredColumns( + storage, storage_snapshot, data_part, with_subcolumns, all_pre_column_names); + + if (!injected_pre_columns.empty()) + should_reorder = true; + + for (const auto & name : all_pre_column_names) { if (pre_name_set.contains(name)) continue; pre_column_names.push_back(name); + pre_name_set.insert(name); } - if (pre_column_names.empty()) - pre_column_names.push_back(column_names[0]); - - const auto injected_pre_columns = injectRequiredColumns( - storage, storage_snapshot, data_part, with_subcolumns, pre_column_names); - - if (!injected_pre_columns.empty()) - should_reorder = true; - Names post_column_names; for (const auto & name : column_names) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index a20414d9616..f78e105010d 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1003,6 +1003,12 @@ Columns MergeTreeRangeReader::continueReadingChain(ReadResult & result, size_t & Columns columns; num_rows = 0; +/////////////// +// HACK!!: no columns need to be read at this step? (only more filtering) + if (merge_tree_reader->getColumns().empty()) + return columns; +/////////////// + if (result.rowsPerGranule().empty()) { /// If zero rows were read on prev step, than there is no more rows to read. From 935c9e454fd55be0dd7cf8c8495da4b9b9a51607 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Thu, 9 Jun 2022 16:04:38 +0200 Subject: [PATCH 025/101] Test case for no columns read in row level filter --- ...9_no_columns_in_row_level_filter.reference | 8 +++++ .../02319_no_columns_in_row_level_filter.sql | 31 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 tests/queries/0_stateless/02319_no_columns_in_row_level_filter.reference create mode 100644 tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql diff --git a/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.reference b/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.reference new file mode 100644 index 00000000000..55d013f68b4 --- /dev/null +++ b/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.reference @@ -0,0 +1,8 @@ +1000000 +0 +0 +0 +400000 +195431 +195431 +5923 diff --git a/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql b/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql new file mode 100644 index 00000000000..4aa62ec8b8d --- /dev/null +++ b/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql @@ -0,0 +1,31 @@ +DROP ROW POLICY IF EXISTS test_filter_policy ON test_table; +DROP ROW POLICY IF EXISTS test_filter_policy_2 ON test_table; +DROP TABLE IF EXISTS test_table; + +CREATE TABLE test_table (`n` UInt64, `s` String) +ENGINE = MergeTree +PRIMARY KEY n ORDER BY n; + +INSERT INTO test_table SELECT number, concat('some string ', CAST(number, 'String')) FROM numbers(1000000); + +-- Create row policy that doesn't use any column +CREATE ROW POLICY test_filter_policy ON test_table USING False TO ALL; + +-- Run query under default user so that always false row_level_filter is added that doesn't require any columns +SELECT count(1) FROM test_table; +SELECT count(1) FROM test_table PREWHERE (n % 8192) < 4000; +SELECT count(1) FROM test_table WHERE (n % 8192) < 4000; +SELECT count(1) FROM test_table PREWHERE (n % 8192) < 4000 WHERE (n % 33) == 0; + +-- Add policy for default user that will read a column +CREATE ROW POLICY test_filter_policy_2 ON test_table USING (n % 5) >= 3 TO default; + +-- Run query under default user that needs the same column as PREWHERE and WHERE +SELECT count(1) FROM test_table; +SELECT count(1) FROM test_table PREWHERE (n % 8192) < 4000; +SELECT count(1) FROM test_table WHERE (n % 8192) < 4000; +SELECT count(1) FROM test_table PREWHERE (n % 8192) < 4000 WHERE (n % 33) == 0; + +DROP TABLE test_table; +DROP ROW POLICY test_filter_policy ON test_table; +DROP ROW POLICY test_filter_policy_2 ON test_table; From e5c0ebc7b5f574c678576b798ba3dcdec1f0adb3 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 10 Jun 2022 11:44:53 +0200 Subject: [PATCH 026/101] Handle read from wide part when no columns are requested --- src/Storages/MergeTree/MergeTreeReaderWide.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 93c1e23884a..7517463e8a8 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -67,9 +67,13 @@ size_t MergeTreeReaderWide::readRows( size_t read_rows = 0; try { - size_t num_columns = columns.size(); + size_t num_columns = res_columns.size(); checkNumberOfColumns(num_columns); + /// TODO: is this ok to request no columns? + if (num_columns == 0) + return max_rows_to_read; + std::unordered_map caches; std::unordered_set prefetched_streams; From ba89c3954cd9c009ed275a89806849558b7f0eb6 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 10 Jun 2022 18:32:37 +0200 Subject: [PATCH 027/101] Do not add the same vitrual if it has been added by prev_reader --- src/Storages/MergeTree/MergeTreeRangeReader.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index f78e105010d..a9f96355c22 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1159,6 +1159,9 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r for (const auto & column_name : non_const_virtual_column_names) { + if (block.has(column_name)) + continue; + if (column_name == "_part_offset") block.insert({result.columns[pos], std::make_shared(), column_name}); else From b3922461b3fd16ea2289d81e0bd9b903e3b68867 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 10 Jun 2022 18:33:43 +0200 Subject: [PATCH 028/101] Properly handle empty actions --- src/Storages/MergeTree/MergeTreeRangeReader.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index a9f96355c22..81df11b2545 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1187,7 +1187,8 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r block.setColumns(columns); } */ - prewhere_info->actions->execute(block); + if (prewhere_info->actions) + prewhere_info->actions->execute(block); prewhere_column_pos = block.getPositionByName(prewhere_info->column_name); @@ -1317,7 +1318,7 @@ std::string PrewhereExprInfo::dump() const for (size_t i = 0; i < steps.size(); ++i) { s << "STEP " << i << ":\n" - << " ACTIONS: " << steps[i].actions->dumpActions() << "\n" + << " ACTIONS: " << (steps[i].actions ? steps[i].actions->dumpActions() : "nullptr") << "\n" << " COLUMN: " << steps[i].column_name << "\n" << " REMOVE_COLUMN: " << steps[i].remove_column << "\n" << " NEED_FILTER: " << steps[i].need_filter << "\n"; From a5b3f83181c96afa7130b3dbe4a53dfa7f274947 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 13 Jun 2022 11:57:55 +0200 Subject: [PATCH 029/101] Always remove row filter column --- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 98a06af88c9..77ed83446b1 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1176,7 +1176,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

row_level_filter, expressions.prewhere_info->row_level_column_name, - true);//false); + true); row_level_filter_step->setStepDescription("Row-level security filter (PREWHERE)"); query_plan.addStep(std::move(row_level_filter_step)); From 5427d5c51b7679f9f86426e1a41c8705a0866c99 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 13 Jun 2022 11:59:00 +0200 Subject: [PATCH 030/101] Apply filter at row level security step --- src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 7a5ed63fa0d..26ea86001d4 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -80,7 +80,7 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( .actions = std::make_shared(prewhere_info->row_level_filter, actions_settings), .column_name = prewhere_info->row_level_column_name, .remove_column = true, - .need_filter = true //prewhere_info->need_filter // ???? + .need_filter = true }; prewhere_actions->steps.emplace_back(std::move(row_level_filter_step)); From aea5bc7527fceb6f791527d8e440f15f4d568314 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 13 Jun 2022 12:06:28 +0200 Subject: [PATCH 031/101] Removed unused should_reorder flag --- src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp | 10 +++------- src/Storages/MergeTree/MergeTreeBlockReadUtils.h | 4 ---- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index ed6d5afaf09..aa6ef1a35f1 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -276,8 +276,8 @@ MergeTreeReadTaskColumns getReadTaskColumns( Names pre_column_names; /// inject columns required for defaults evaluation - bool should_reorder = !injectRequiredColumns( - storage, storage_snapshot, data_part, with_subcolumns, column_names).empty(); + injectRequiredColumns( + storage, storage_snapshot, data_part, with_subcolumns, column_names); MergeTreeReadTaskColumns result; auto options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects(); @@ -315,9 +315,6 @@ MergeTreeReadTaskColumns getReadTaskColumns( const auto injected_pre_columns = injectRequiredColumns( storage, storage_snapshot, data_part, with_subcolumns, all_pre_column_names); - if (!injected_pre_columns.empty()) - should_reorder = true; - for (const auto & name : all_pre_column_names) { if (pre_name_set.contains(name)) @@ -342,9 +339,8 @@ MergeTreeReadTaskColumns getReadTaskColumns( result.pre_columns.push_back(storage_snapshot->getColumnsByNames(options, pre_column_names)); ////////////// - /// 3. Rest of the requested columns + /// 3. Rest of the requested columns result.columns = storage_snapshot->getColumnsByNames(options, column_names); - result.should_reorder = should_reorder; return result; } diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index a3e754f83c3..d0cdedb09c0 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -36,8 +36,6 @@ struct MergeTreeReadTaskColumns NamesAndTypesList columns; /// column names to read during each PREWHERE step std::vector pre_columns; - /// resulting block may require reordering in accordance with `ordered_names` - bool should_reorder = false; std::string dump() const { @@ -73,8 +71,6 @@ struct MergeTreeReadTask // const NamesAndTypesList & pre_columns; /// should PREWHERE column be returned to requesting side? const bool remove_prewhere_column; -// /// resulting block may require reordering in accordance with `ordered_names` -// const bool should_reorder; /// Used to satistfy preferred_block_size_bytes limitation MergeTreeBlockSizePredictorPtr size_predictor; /// Used to save current range processing status From b629833ef9aa71d091892d0284f7c773d6ea88e1 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 13 Jun 2022 12:32:24 +0200 Subject: [PATCH 032/101] Test cases to check that row level filter is applied before prewhere actions are executed --- .../02319_no_columns_in_row_level_filter.reference | 3 +++ .../0_stateless/02319_no_columns_in_row_level_filter.sql | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.reference b/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.reference index 55d013f68b4..c0911ffc598 100644 --- a/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.reference +++ b/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.reference @@ -6,3 +6,6 @@ 195431 195431 5923 +200000 +200000 +6061 diff --git a/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql b/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql index 4aa62ec8b8d..e6bc475b081 100644 --- a/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql +++ b/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.sql @@ -26,6 +26,11 @@ SELECT count(1) FROM test_table PREWHERE (n % 8192) < 4000; SELECT count(1) FROM test_table WHERE (n % 8192) < 4000; SELECT count(1) FROM test_table PREWHERE (n % 8192) < 4000 WHERE (n % 33) == 0; +-- Run queries that have division by zero if row level filter isn't applied before prewhere +SELECT count(1) FROM test_table PREWHERE 7 / (n % 5) > 2; +SELECT count(1) FROM test_table WHERE 7 / (n % 5) > 2; +SELECT count(1) FROM test_table PREWHERE 7 / (n % 5) > 2 WHERE (n % 33) == 0; + DROP TABLE test_table; DROP ROW POLICY test_filter_policy ON test_table; DROP ROW POLICY test_filter_policy_2 ON test_table; From dbc6d1a159aded07ed95904b38c005e9d916e2a5 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 13 Jun 2022 15:00:26 +0200 Subject: [PATCH 033/101] Cleanups --- .../MergeTreeBaseSelectProcessor.cpp | 54 ++----- .../MergeTree/MergeTreeBlockReadUtils.cpp | 35 +++-- .../MergeTree/MergeTreeBlockReadUtils.h | 20 +-- .../MergeTree/MergeTreeRangeReader.cpp | 136 ++++++------------ src/Storages/MergeTree/MergeTreeRangeReader.h | 36 +---- .../MergeTree/MergeTreeReaderWide.cpp | 1 - .../MergeTree/MergeTreeSelectProcessor.cpp | 5 +- .../MergeTreeThreadSelectProcessor.cpp | 9 +- src/Storages/SelectQueryInfo.h | 37 ----- 9 files changed, 74 insertions(+), 259 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 26ea86001d4..0d440ea7a8a 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -84,15 +84,7 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( }; prewhere_actions->steps.emplace_back(std::move(row_level_filter_step)); - -// prewhere_actions->row_level_filter = std::make_shared(prewhere_info->row_level_filter, actions_settings); } -// prewhere_actions->row_level_column_name = prewhere_info->row_level_column_name; - -// prewhere_actions->prewhere_actions = std::make_shared(prewhere_info->prewhere_actions, actions_settings); -// prewhere_actions->prewhere_column_name = prewhere_info->prewhere_column_name; -// prewhere_actions->remove_prewhere_column = prewhere_info->remove_prewhere_column; -// prewhere_actions->need_filter = prewhere_info->need_filter; PrewhereExprStep prewhere_step { @@ -103,12 +95,6 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( }; prewhere_actions->steps.emplace_back(std::move(prewhere_step)); - - -// std::cerr -// << "PREWHERE ========================\n" -// << prewhere_actions->dump() -// << "========================\n\n"; } } @@ -236,7 +222,11 @@ void MergeTreeBaseSelectProcessor::initializeRangeReaders(MergeTreeReadTask & cu if (prewhere_info) { - assert(prewhere_actions->steps.size() == pre_reader_for_step.size()); + if (prewhere_actions->steps.size() != pre_reader_for_step.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "PREWHERE steps count mismatch, actions: {}, readers: {}", + prewhere_actions->steps.size(), pre_reader_for_step.size()); + for (size_t i = 0; i < prewhere_actions->steps.size(); ++i) { @@ -244,7 +234,6 @@ void MergeTreeBaseSelectProcessor::initializeRangeReaders(MergeTreeReadTask & cu current_task.pre_range_reader.push_back( MergeTreeRangeReader(pre_reader_for_step[i].get(), prev_reader, &prewhere_actions->steps[i], last_reader, non_const_virtual_column_names)); - prev_reader = ¤t_task.pre_range_reader.back(); } @@ -256,41 +245,14 @@ void MergeTreeBaseSelectProcessor::initializeRangeReaders(MergeTreeReadTask & cu } else { - // HACK!! - // If all columns are read by pre_range_readers than move last pre_range_reader into range_reader + /// If all columns are read by pre_range_readers than move last pre_range_reader into range_reader current_task.range_reader = std::move(current_task.pre_range_reader.back()); current_task.pre_range_reader.pop_back(); } - -/* - if (prewhere_info) - { - if (reader->getColumns().empty()) - { - current_task.range_reader = MergeTreeRangeReader(pre_reader.get(), nullptr, prewhere_actions.get(), true, non_const_virtual_column_names); - } - else - { - MergeTreeRangeReader * pre_reader_ptr = nullptr; - if (pre_reader != nullptr) - { - current_task.pre_range_reader = MergeTreeRangeReader(pre_reader.get(), nullptr, prewhere_actions.get(), false, non_const_virtual_column_names); - pre_reader_ptr = ¤t_task.pre_range_reader; - } - - current_task.range_reader = MergeTreeRangeReader(reader.get(), pre_reader_ptr, nullptr, true, non_const_virtual_column_names); - } - } - else - { - current_task.range_reader = MergeTreeRangeReader(reader.get(), nullptr, nullptr, true, non_const_virtual_column_names); - } -//*/ } static UInt64 estimateNumRows(const MergeTreeReadTask & current_task, UInt64 current_preferred_block_size_bytes, - UInt64 current_max_block_size_rows, UInt64 current_preferred_max_column_in_block_size_bytes, double min_filtration_ratio) - //, const MergeTreeRangeReader & current_reader) + UInt64 current_max_block_size_rows, UInt64 current_preferred_max_column_in_block_size_bytes, double min_filtration_ratio) { const MergeTreeRangeReader & current_reader = current_task.range_reader; @@ -339,7 +301,7 @@ Chunk MergeTreeBaseSelectProcessor::readFromPartImpl() const double min_filtration_ratio = 0.00001; UInt64 recommended_rows = estimateNumRows(*task, current_preferred_block_size_bytes, - current_max_block_size_rows, current_preferred_max_column_in_block_size_bytes, min_filtration_ratio);//, task->range_reader); + current_max_block_size_rows, current_preferred_max_column_in_block_size_bytes, min_filtration_ratio); UInt64 rows_to_read = std::max(static_cast(1), std::min(current_max_block_size_rows, recommended_rows)); auto read_result = task->range_reader.read(rows_to_read, task->mark_ranges); diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index aa6ef1a35f1..50f4c34f004 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -5,6 +5,9 @@ #include #include #include +#include +#include + #include @@ -284,34 +287,22 @@ MergeTreeReadTaskColumns getReadTaskColumns( if (with_subcolumns) options.withSubcolumns(); -// NameSet all_pre_columns; - if (prewhere_info) { NameSet pre_name_set; -// TODO: for each prewhere step - + /// Add column reading steps: /// 1. Columns for row level filter if (prewhere_info->row_level_filter) { Names row_filter_column_names = prewhere_info->row_level_filter->getRequiredColumnsNames(); - -////// HACK!!! result.pre_columns.push_back(storage_snapshot->getColumnsByNames(options, row_filter_column_names)); -////////////// - pre_name_set.insert(row_filter_column_names.begin(), row_filter_column_names.end()); - -// all_pre_columns.insert(pre_column_names.begin(), pre_column_names.end()); } /// 2. Columns for prewhere Names all_pre_column_names = prewhere_info->prewhere_actions->getRequiredColumnsNames(); -// if (pre_column_names.empty()) -// pre_column_names.push_back(column_names[0]); - const auto injected_pre_columns = injectRequiredColumns( storage, storage_snapshot, data_part, with_subcolumns, all_pre_column_names); @@ -323,7 +314,6 @@ MergeTreeReadTaskColumns getReadTaskColumns( pre_name_set.insert(name); } - Names post_column_names; for (const auto & name : column_names) if (!pre_name_set.contains(name)) @@ -332,16 +322,23 @@ MergeTreeReadTaskColumns getReadTaskColumns( column_names = post_column_names; } -// NamesAndTypesList all_columns; - - -////// HACK!!! result.pre_columns.push_back(storage_snapshot->getColumnsByNames(options, pre_column_names)); -////////////// /// 3. Rest of the requested columns result.columns = storage_snapshot->getColumnsByNames(options, column_names); return result; } + +std::string MergeTreeReadTaskColumns::dump() const +{ + WriteBufferFromOwnString s; + for (size_t i = 0; i < pre_columns.size(); ++i) + { + s << "STEP " << i << ": " << pre_columns[i].toString() << "\n"; + } + s << "COLUMNS: " << columns.toString() << "\n"; + return s.str(); +} + } diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index d0cdedb09c0..0a9a307fec5 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -37,16 +37,7 @@ struct MergeTreeReadTaskColumns /// column names to read during each PREWHERE step std::vector pre_columns; - std::string dump() const - { - std::ostringstream s; - for (size_t i = 0; i < pre_columns.size(); ++i) - { - s << "STEP " << i << ": " << pre_columns[i].toString() << "\n"; - } - s << "COLUMNS: " << columns.toString() << "\n"; - return s.str(); - } + std::string dump() const; }; /// A batch of work for MergeTreeThreadSelectProcessor @@ -62,13 +53,8 @@ struct MergeTreeReadTask const Names & ordered_names; /// used to determine whether column should be filtered during PREWHERE or WHERE const NameSet & column_name_set; - - const MergeTreeReadTaskColumns& task_columns; - -// /// column names to read during WHERE -// const NamesAndTypesList & columns; -// /// column names to read during PREWHERE -// const NamesAndTypesList & pre_columns; + /// column names to read during PREWHERE and WHERE + const MergeTreeReadTaskColumns & task_columns; /// should PREWHERE column be returned to requesting side? const bool remove_prewhere_column; /// Used to satistfy preferred_block_size_bytes limitation diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 81df11b2545..eaecac06118 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1,9 +1,10 @@ -#include #include #include #include #include #include +#include "IO/WriteBufferFromString.h" +#include "IO/Operators.h" #include #include #include @@ -44,7 +45,7 @@ static void filterColumns(Columns & columns, const IColumn::Filter & filter) } } } -/* + static void filterColumns(Columns & columns, const ColumnPtr & filter) { ConstantFilterDescription const_descr(*filter); @@ -63,7 +64,7 @@ static void filterColumns(Columns & columns, const ColumnPtr & filter) FilterDescription descr(*filter); filterColumns(columns, *descr.data); } -*/ + size_t MergeTreeRangeReader::ReadResult::getLastMark(const MergeTreeRangeReader::ReadResult::RangesInfo & ranges) { @@ -359,7 +360,6 @@ void MergeTreeRangeReader::ReadResult::setFilterConstFalse() num_rows = 0; } -/// void MergeTreeRangeReader::ReadResult::optimize(bool can_read_incomplete_granules, bool allow_filter_columns) { if (total_rows_per_granule == 0 || filter == nullptr) @@ -417,7 +417,6 @@ void MergeTreeRangeReader::ReadResult::optimize(bool can_read_incomplete_granule need_filter = true; } -/// For each read granule size_t MergeTreeRangeReader::ReadResult::countZeroTails(const IColumn::Filter & filter_vec, NumRows & zero_tails, bool can_read_incomplete_granules) const { zero_tails.resize(0); @@ -676,28 +675,12 @@ MergeTreeRangeReader::MergeTreeRangeReader( if (prewhere_info) { -// for (const auto & step : prewhere_info->steps) const auto & step = *prewhere_info; - { - if (step.actions) - step.actions->execute(sample_block, true); + if (step.actions) + step.actions->execute(sample_block, true); - if (step.remove_column) - sample_block.erase(step.column_name); - } - -/* if (prewhere_info->row_level_filter) - { - prewhere_info->row_level_filter->execute(sample_block, true); - sample_block.erase(prewhere_info->row_level_column_name); - } - - if (prewhere_info->prewhere_actions) - prewhere_info->prewhere_actions->execute(sample_block, true); - - if (prewhere_info->remove_prewhere_column) - sample_block.erase(prewhere_info->prewhere_column_name); -*/ + if (step.remove_column) + sample_block.erase(step.column_name); } } @@ -998,16 +981,14 @@ void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 lead result.columns.emplace_back(std::move(column)); } -Columns MergeTreeRangeReader::continueReadingChain(ReadResult & result, size_t & num_rows) +Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, size_t & num_rows) { Columns columns; num_rows = 0; -/////////////// -// HACK!!: no columns need to be read at this step? (only more filtering) + /// No columns need to be read at this step? (only more filtering) if (merge_tree_reader->getColumns().empty()) return columns; -/////////////// if (result.rowsPerGranule().empty()) { @@ -1044,8 +1025,11 @@ Columns MergeTreeRangeReader::continueReadingChain(ReadResult & result, size_t & stream.skip(result.numRowsToSkipInLastGranule()); num_rows += stream.finalize(columns); - - // TODO: here we can verify that stream and prev_reader->stream are at exactly same offset + /// verify that stream and prev_reader->stream are at exactly same offset + if (stream.currentPartOffset() != prev_reader->stream.currentPartOffset()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Current step stream read position {} doesn't match previous step read read position {}", + stream.currentPartOffset(), prev_reader->stream.currentPartOffset()); /// added_rows may be zero if all columns were read in prewhere and it's ok. if (num_rows && num_rows != result.totalRowsPerGranule()) @@ -1125,15 +1109,17 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r const auto & header = merge_tree_reader->getColumns(); size_t num_columns = header.size(); - // TODO: properly check that we have columns from previous steps and newly read required columns + /// Check that we have columns from previous steps and newly read required columns if (result.columns.size() < num_columns + non_const_virtual_column_names.size()) - throw Exception("Invalid number of columns passed to MergeTreeRangeReader. " - "Expected " + toString(num_columns) + ", " - "got " + toString(result.columns.size()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Invalid number of columns passed to MergeTreeRangeReader. Expected {}, got {}", + num_columns, result.columns.size()); - ColumnPtr current_filter; - ColumnPtr filter; -// ColumnPtr row_level_filter; + /// This filter has the size of total_rows_per granule. It is applied after reading contiguous chunks from + /// the start of each granule. + ColumnPtr combined_filter; + /// Filter computed at the current step. Its size is equal to num_rows which is <= total_rows_per_granule + ColumnPtr current_step_filter; size_t prewhere_column_pos; { @@ -1150,12 +1136,8 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r } } -// for (auto name_and_type = header.begin(); pos < num_columns; ++pos, ++name_and_type) -// block.insert({result.columns[pos], name_and_type->type, name_and_type->name}); - for (const auto & name_and_type : header) { - block.insert({result.columns[pos], name_and_type.type, name_and_type.name}); - ++pos; - } + for (auto name_and_type = header.begin(); name_and_type != header.end() && pos < result.columns.size(); ++pos, ++name_and_type) + block.insert({result.columns[pos], name_and_type->type, name_and_type->name}); for (const auto & column_name : non_const_virtual_column_names) { @@ -1163,7 +1145,14 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r continue; if (column_name == "_part_offset") + { + if (pos >= result.columns.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Invalid number of columns passed to MergeTreeRangeReader. Expected {}, got {}", + num_columns, result.columns.size()); + block.insert({result.columns[pos], std::make_shared(), column_name}); + } else throw Exception("Unexpected non-const virtual column: " + column_name, ErrorCodes::LOGICAL_ERROR); ++pos; @@ -1172,21 +1161,6 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r /// Columns might be projected out. We need to store them here so that default columns can be evaluated later. result.block_before_prewhere = block; -/* if (prewhere_info->row_level_filter) - { - prewhere_info->row_level_filter->execute(block); - auto row_level_filter_pos = block.getPositionByName(prewhere_info->row_level_column_name); - row_level_filter = block.getByPosition(row_level_filter_pos).column; - block.erase(row_level_filter_pos); - - auto columns = block.getColumns(); - filterColumns(columns, row_level_filter); - if (columns.empty()) - block = block.cloneEmpty(); - else - block.setColumns(columns); - } -*/ if (prewhere_info->actions) prewhere_info->actions->execute(block); @@ -1197,40 +1171,27 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r for (auto & col : block) result.columns.emplace_back(std::move(col.column)); - current_filter.swap(result.columns[prewhere_column_pos]); - filter = current_filter; - - + current_step_filter.swap(result.columns[prewhere_column_pos]); + combined_filter = current_step_filter; } if (result.getFilter()) { ColumnPtr prev_filter = result.getFilterHolder(); - filter = combineFilters(prev_filter, std::move(filter)); - -// /// TODO: implement for prewhere chain. -// /// In order to do it we need combine filter and result.filter, where filter filters only '1' in result.filter. -// throw Exception("MergeTreeRangeReader chain with several prewhere actions in not implemented.", -// ErrorCodes::LOGICAL_ERROR); + combined_filter = combineFilters(prev_filter, std::move(combined_filter)); } -// if (filter && row_level_filter) -// { -// row_level_filter = combineFilters(std::move(row_level_filter), filter); -// result.setFilter(row_level_filter); -// } -// else - result.setFilter(filter); + result.setFilter(combined_filter); /// If there is a WHERE, we filter in there, and only optimize IO and shrink columns here if (!last_reader_in_chain) - result.optimize(merge_tree_reader->canReadIncompleteGranules(), true); // TODO: prewhere_info->row_level_filter == nullptr); + result.optimize(merge_tree_reader->canReadIncompleteGranules(), true); /// If we read nothing or filter gets optimized to nothing if (result.totalRowsPerGranule() == 0) result.setFilterConstFalse(); /// If we need to filter in PREWHERE - else if (prewhere_info->need_filter || result.need_filter)// || prewhere_info->row_level_filter) + else if (prewhere_info->need_filter || result.need_filter) { /// If there is a filter and without optimized if (result.getFilter() && last_reader_in_chain) @@ -1250,22 +1211,7 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r /// filter might be shrunk while columns not const auto * result_filter = result.getFilterOriginal(); -// if (row_level_filter) -// filterColumns(result.columns, filter); -// else - // filterColumns(result.columns, result_filter->getData()); - - - { - FilterDescription current_filter_descr(*current_filter); - // TODO: need to filter by current filter column that has num_rows size, not the original size - - // TODO: properly handle const true and const false cases - if (current_filter_descr.countBytesInFilter() == 0) - result.columns.clear(); - else if (current_filter_descr.data) - filterColumns(result.columns, *current_filter_descr.data); - } + filterColumns(result.columns, current_step_filter); result.need_filter = true; @@ -1313,7 +1259,7 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r std::string PrewhereExprInfo::dump() const { - std::ostringstream s; + WriteBufferFromOwnString s; for (size_t i = 0; i < steps.size(); ++i) { diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 15712a77797..667e8175089 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -18,8 +18,7 @@ using PrewhereInfoPtr = std::shared_ptr; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; -//* -struct PrewhereExprStep +struct PrewhereExprStep { ExpressionActionsPtr actions; String column_name; @@ -30,42 +29,11 @@ struct PrewhereExprStep /// The same as PrewhereInfo, but with ExpressionActions instead of ActionsDAG struct PrewhereExprInfo { -// /// Actions for row level security filter. Applied separately before prewhere_actions. -// /// This actions are separate because prewhere condition should not be executed over filtered rows. -// ExpressionActionsPtr row_level_filter; -// /// Actions which are executed on block in order to get filter column for prewhere step. -// ExpressionActionsPtr prewhere_actions; -// String row_level_column_name; -// String prewhere_column_name; -// bool remove_prewhere_column = false; -// bool need_filter = false; - std::vector steps; -///// PrewhereExprStep deleted_row_filter; -///// PrewhereExprStep row_level_filter; -///// PrewhereExprStep prewhere; - std::string dump() const; }; -/*/ - -/// The same as PrewhereInfo, but with ExpressionActions instead of ActionsDAG -struct PrewhereExprInfo -{ - /// Actions for row level security filter. Applied separately before prewhere_actions. - /// This actions are separate because prewhere condition should not be executed over filtered rows. - ExpressionActionsPtr row_level_filter; - /// Actions which are executed on block in order to get filter column for prewhere step. - ExpressionActionsPtr prewhere_actions; - String row_level_column_name; - String prewhere_column_name; - bool remove_prewhere_column = false; - bool need_filter = false; -}; -//*/ - /// MergeTreeReader iterator which allows sequential reading for arbitrary number of rows between pairs of marks in the same part. /// Stores reading state, which can be inside granule. Can skip rows in current granule and start reading from next mark. /// Used generally for reading number of rows less than index granularity to decrease cache misses for fat blocks. @@ -278,7 +246,7 @@ public: private: ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges); - Columns continueReadingChain(ReadResult & result, size_t & num_rows); + Columns continueReadingChain(const ReadResult & result, size_t & num_rows); void executePrewhereActionsAndFilterColumns(ReadResult & result); void fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset); diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 7517463e8a8..07fe187332e 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -70,7 +70,6 @@ size_t MergeTreeReaderWide::readRows( size_t num_columns = res_columns.size(); checkNumberOfColumns(num_columns); - /// TODO: is this ok to request no columns? if (num_columns == 0) return max_rows_to_read; diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index eb4746cbf04..aa6c457f412 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -50,9 +50,6 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( void MergeTreeSelectProcessor::initializeReaders() { -pre_reader_for_step.clear(); // is it being reused??? - - task_columns = getReadTaskColumns( storage, storage_snapshot, data_part, required_columns, prewhere_info, /*with_subcolumns=*/ true); @@ -69,6 +66,8 @@ pre_reader_for_step.clear(); // is it being reused??? reader = data_part->getReader(task_columns.columns, storage_snapshot->getMetadataForQuery(), all_mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, {}, {}); + pre_reader_for_step.clear(); + if (prewhere_info) { for (const auto & pre_columns_for_step : task_columns.pre_columns) diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp index 47615ae6e3b..574ce4479f2 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp @@ -105,9 +105,6 @@ void MergeTreeThreadSelectProcessor::finalizeNewTask() auto profile_callback = [this](ReadBufferFromFileBase::ProfileInfo info_) { pool->profileFeedback(info_); }; const auto & metadata_snapshot = storage_snapshot->metadata; -//std::cerr << "==============TASK:==============\n" << task->task_columns.dump() << "\n"; -//std::cerr << "pre_reader_for_step.size() " << pre_reader_for_step.size() << "\n\n"; - if (!reader) { if (use_uncompressed_cache) @@ -118,8 +115,7 @@ void MergeTreeThreadSelectProcessor::finalizeNewTask() owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, IMergeTreeReader::ValueSizeMap{}, profile_callback); -pre_reader_for_step.clear(); // is it being reused??? - + pre_reader_for_step.clear(); if (prewhere_info) { for (const auto & pre_columns_per_step : task->task_columns.pre_columns) @@ -140,8 +136,7 @@ pre_reader_for_step.clear(); // is it being reused??? owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, reader->getAvgValueSizeHints(), profile_callback); -pre_reader_for_step.clear(); // is it being reused??? - + pre_reader_for_step.clear(); if (prewhere_info) { for (const auto & pre_columns_per_step : task->task_columns.pre_columns) diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 23b592ce7ac..bdb4c392c48 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -47,42 +47,6 @@ using MergeTreeDataSelectAnalysisResultPtr = std::shared_ptr; -/* -struct PrewhereStep -{ - ActionsDAGPtr prewhere_actions; - String prewhere_column_name; - bool remove_prewhere_column = false; - bool need_filter = false; - - explicit PrewhereStep(ActionsDAGPtr prewhere_actions_, String prewhere_column_name_) - : prewhere_actions(std::move(prewhere_actions_)), prewhere_column_name(std::move(prewhere_column_name_)) {} - - std::string dump() const; -}; - -struct PrewhereInfo -{ -// /// Actions for row level security filter. Applied separately before prewhere_actions. -// /// This actions are separate because prewhere condition should not be executed over filtered rows. -// ActionsDAGPtr row_level_filter; -// /// Actions which are executed on block in order to get filter column for prewhere step. -// ActionsDAGPtr prewhere_actions; -// String row_level_column_name; -// String prewhere_column_name; -// bool remove_prewhere_column = false; -// bool need_filter = false; - - std::vector steps; - - PrewhereInfo() = default; -// explicit PrewhereInfo(ActionsDAGPtr prewhere_actions_, String prewhere_column_name_) -// : prewhere_actions(std::move(prewhere_actions_)), prewhere_column_name(std::move(prewhere_column_name_)) {} - - std::string dump() const; -}; -/*/ - struct PrewhereInfo { /// Actions for row level security filter. Applied separately before prewhere_actions. @@ -101,7 +65,6 @@ struct PrewhereInfo std::string dump() const; }; -//*/ /// Helper struct to store all the information about the filter expression. struct FilterInfo From e5b55b965bc5fd9d241b05cd0f817727ea3ad28f Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Tue, 14 Jun 2022 17:03:30 +0200 Subject: [PATCH 034/101] Removed incorrect check --- src/Storages/MergeTree/MergeTreeRangeReader.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index eaecac06118..b6bc3102c06 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1025,12 +1025,6 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si stream.skip(result.numRowsToSkipInLastGranule()); num_rows += stream.finalize(columns); - /// verify that stream and prev_reader->stream are at exactly same offset - if (stream.currentPartOffset() != prev_reader->stream.currentPartOffset()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Current step stream read position {} doesn't match previous step read read position {}", - stream.currentPartOffset(), prev_reader->stream.currentPartOffset()); - /// added_rows may be zero if all columns were read in prewhere and it's ok. if (num_rows && num_rows != result.totalRowsPerGranule()) throw Exception("RangeReader read " + toString(num_rows) + " rows, but " From 8138a3a36e138e423333abc650f757cad9d05d0f Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Wed, 22 Jun 2022 17:17:25 +0200 Subject: [PATCH 035/101] Cleanups based on review comments --- src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp | 8 ++++---- src/Storages/MergeTree/MergeTreeBlockReadUtils.h | 4 +++- src/Storages/MergeTree/MergeTreeRangeReader.h | 4 +++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 0d440ea7a8a..31ca533a9ec 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -231,10 +231,10 @@ void MergeTreeBaseSelectProcessor::initializeRangeReaders(MergeTreeReadTask & cu for (size_t i = 0; i < prewhere_actions->steps.size(); ++i) { last_reader = reader->getColumns().empty() && (i + 1 == prewhere_actions->steps.size()); - current_task.pre_range_reader.push_back( + current_task.pre_range_readers.push_back( MergeTreeRangeReader(pre_reader_for_step[i].get(), prev_reader, &prewhere_actions->steps[i], last_reader, non_const_virtual_column_names)); - prev_reader = ¤t_task.pre_range_reader.back(); + prev_reader = ¤t_task.pre_range_readers.back(); } } @@ -246,8 +246,8 @@ void MergeTreeBaseSelectProcessor::initializeRangeReaders(MergeTreeReadTask & cu else { /// If all columns are read by pre_range_readers than move last pre_range_reader into range_reader - current_task.range_reader = std::move(current_task.pre_range_reader.back()); - current_task.pre_range_reader.pop_back(); + current_task.range_reader = std::move(current_task.pre_range_readers.back()); + current_task.pre_range_readers.pop_back(); } } diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index 0a9a307fec5..f9f82dbd1f2 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -61,7 +61,9 @@ struct MergeTreeReadTask MergeTreeBlockSizePredictorPtr size_predictor; /// Used to save current range processing status MergeTreeRangeReader range_reader; - std::deque pre_range_reader; + /// Range readers for multiple filtering steps: row level security, PREWHERE etc. + /// NOTE: we take references to elements and push_back new elements, that's why it is a deque but noit a vector + std::deque pre_range_readers; bool isFinished() const { return mark_ranges.empty() && range_reader.isCurrentRangeFinished(); } diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 667e8175089..b4fb8913122 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -209,7 +209,9 @@ public: size_t countBytesInResultFilter(const IColumn::Filter & filter); - bool need_filter = false; /// TODO: what exactly does it mean? + /// If this flag is false than filtering form PREWHERE can be delayed and done in WHERE + /// to reduce memory copies and applying heavy filters multiple times + bool need_filter = false; Block block_before_prewhere; From 0ee47363d4e1e78ca5595f2975c28209e827638f Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Wed, 22 Jun 2022 18:13:46 +0200 Subject: [PATCH 036/101] Fixed includes --- src/Storages/MergeTree/MergeTreeRangeReader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index b6bc3102c06..6e14e9c7aa9 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -3,8 +3,8 @@ #include #include #include -#include "IO/WriteBufferFromString.h" -#include "IO/Operators.h" +#include +#include #include #include #include From 267025c35bcd48070bfe4f924c9761ca0e1bf0a4 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 23 Jun 2022 16:51:15 +0500 Subject: [PATCH 037/101] update base-x cmakelists --- contrib/base-x-cmake/CMakeLists.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/contrib/base-x-cmake/CMakeLists.txt b/contrib/base-x-cmake/CMakeLists.txt index ab5696c9fb6..b0c11a176c6 100644 --- a/contrib/base-x-cmake/CMakeLists.txt +++ b/contrib/base-x-cmake/CMakeLists.txt @@ -12,9 +12,8 @@ set (SRCS ${LIBRARY_DIR}/uinteger_t.hh ) -add_library(_base-x ${SRCS}) - -target_include_directories(_base-x SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}) +add_library(_base-x INTERFACE) +target_include_directories(_base-x SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/base-x") if (XCODE OR XCODE_VERSION) # https://gitlab.kitware.com/cmake/cmake/issues/17457 From e33324bd358533aeafad16e7f1675a1d148b1060 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 23 Jun 2022 16:56:28 +0500 Subject: [PATCH 038/101] fix docs --- docs/en/sql-reference/functions/string-functions.md | 2 +- docs/ru/sql-reference/functions/string-functions.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 7c6ae903acf..61efc2cfcdb 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -509,7 +509,7 @@ base58Decode(encoded[, alphabet_name]) - `decoded` — [String](../../sql-reference/data-types/string.md) column or constant. - `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid base58-encoded value, an exception is thrown. -- `alphabet_name` — String constant. Specifies alphabet used for encoding. Possible values: `gmp`, `bitcoin`, `ripple`, `flickr`. Default: `gmp`. +- `alphabet_name` — String constant. Specifies alphabet used for encoding. Possible values: `gmp`, `bitcoin`, `ripple`, `flickr`. Default: `bitcoin`. **Returned value** diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index e85a97e0099..1acf5ec58b2 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -505,7 +505,7 @@ base58Decode(encoded[, alphabet_name]) - `decoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md). - `encoded` — Колонка или строка типа [String](../../sql-reference/data-types/string.md). Если входная строка не является корректным кодом для какой-либо другой строки, возникнет исключение `1001`. -- `alphabet_name` — Строковая константа. Указывает алфавит, для которого необходимо получить код. Может принимать одно из следующих значений: `gmp`, `bitcoin`, `ripple`, `flickr`. По умолчанию: `gmp`. +- `alphabet_name` — Строковая константа. Указывает алфавит, для которого необходимо получить код. Может принимать одно из следующих значений: `gmp`, `bitcoin`, `ripple`, `flickr`. По умолчанию: `bitcoin`. **Возвращаемое значение** From 468c98ff42f75c9b6af2d87662e3d2d087a3778c Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 23 Jun 2022 17:46:27 +0200 Subject: [PATCH 039/101] Better --- src/Access/Common/AccessType.h | 1 + src/Common/FileCacheFactory.cpp | 47 +++++++++---------- src/Common/FileCacheFactory.h | 13 +++-- src/Interpreters/AsynchronousMetrics.cpp | 4 +- .../InterpreterDescribeCacheQuery.cpp | 4 +- .../InterpreterShowTablesQuery.cpp | 23 +++++++++ src/Interpreters/InterpreterSystemQuery.cpp | 2 +- src/Parsers/ASTShowTablesQuery.cpp | 6 +++ src/Parsers/ASTShowTablesQuery.h | 3 +- src/Parsers/ParserShowTablesQuery.cpp | 5 ++ .../System/StorageSystemFilesystemCache.cpp | 2 +- 11 files changed, 73 insertions(+), 37 deletions(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 41739406de4..10db8907f8a 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -25,6 +25,7 @@ enum class AccessType M(SHOW_DICTIONARIES, "", DICTIONARY, SHOW) /* allows to execute SHOW DICTIONARIES, SHOW CREATE DICTIONARY, EXISTS ; implicitly enabled by any grant on the dictionary */\ M(SHOW, "", GROUP, ALL) /* allows to execute SHOW, USE, EXISTS, CHECK, DESCRIBE */\ + M(SHOW_CACHES, "", GROUP, ALL) \ \ M(SELECT, "", COLUMN, ALL) \ M(INSERT, "", COLUMN, ALL) \ diff --git a/src/Common/FileCacheFactory.cpp b/src/Common/FileCacheFactory.cpp index b2dc2739d8a..259c1d3f48e 100644 --- a/src/Common/FileCacheFactory.cpp +++ b/src/Common/FileCacheFactory.cpp @@ -24,31 +24,21 @@ FileCacheFactory::CacheByBasePath FileCacheFactory::getAll() const FileCacheSettings & FileCacheFactory::getSettings(const std::string & cache_base_path) { std::lock_guard lock(mutex); - - auto * cache_data = getImpl(cache_base_path, lock); - if (cache_data) - return cache_data->settings; - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path); -} - -FileCacheFactory::FileCacheData * FileCacheFactory::getImpl(const std::string & cache_base_path, std::lock_guard &) -{ auto it = caches_by_path.find(cache_base_path); if (it == caches_by_path.end()) - return nullptr; - return &it->second; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path); + return it->second->settings; + } FileCachePtr FileCacheFactory::get(const std::string & cache_base_path) { std::lock_guard lock(mutex); + auto it = caches_by_path.find(cache_base_path); + if (it == caches_by_path.end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path); + return it->second->cache; - auto * cache_data = getImpl(cache_base_path, lock); - if (cache_data) - return cache_data->cache; - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path); } FileCachePtr FileCacheFactory::getOrCreate( @@ -56,33 +46,38 @@ FileCachePtr FileCacheFactory::getOrCreate( { std::lock_guard lock(mutex); - auto * cache_data = getImpl(cache_base_path, lock); - if (cache_data) + auto it = caches_by_path.find(cache_base_path); + if (it != caches_by_path.end()) { - registerCacheByName(name, *cache_data); - return cache_data->cache; + caches_by_name.emplace(name, it->second); + return it->second->cache; } auto cache = std::make_shared(cache_base_path, file_cache_settings); FileCacheData result{cache, file_cache_settings}; - registerCacheByName(name, result); - caches_by_path.emplace(cache_base_path, result); + auto cache_it = caches.insert(caches.end(), std::move(result)); + caches_by_name.emplace(name, cache_it); + caches_by_path.emplace(cache_base_path, cache_it); return cache; } FileCacheFactory::FileCacheData FileCacheFactory::getByName(const std::string & name) { + std::lock_guard lock(mutex); + auto it = caches_by_name.find(name); if (it == caches_by_name.end()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by name: {}", name); - return it->second; + + return *it->second; } -void FileCacheFactory::registerCacheByName(const std::string & name, const FileCacheData & cache_data) +FileCacheFactory::CacheByName FileCacheFactory::getAllByName() { - caches_by_name.emplace(std::make_pair(name, cache_data)); + std::lock_guard lock(mutex); + return caches_by_name; } } diff --git a/src/Common/FileCacheFactory.h b/src/Common/FileCacheFactory.h index 007d77643b6..0d99da0343c 100644 --- a/src/Common/FileCacheFactory.h +++ b/src/Common/FileCacheFactory.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -24,8 +25,9 @@ public: FileCacheData(FileCachePtr cache_, const FileCacheSettings & settings_) : cache(cache_), settings(settings_) {} }; - using CacheByBasePath = std::unordered_map; - using CacheByName = std::unordered_map; + using Caches = std::list; + using CacheByBasePath = std::unordered_map; + using CacheByName = std::unordered_map; static FileCacheFactory & instance(); @@ -39,11 +41,12 @@ public: FileCacheData getByName(const std::string & name); -private: - FileCacheData * getImpl(const std::string & cache_base_path, std::lock_guard &); - void registerCacheByName(const std::string & name, const FileCacheData & cache_data); + CacheByName getAllByName(); +private: std::mutex mutex; + Caches caches; + CacheByBasePath caches_by_path; CacheByName caches_by_name; }; diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 4ac5acfd60f..37ed418ec2a 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -615,8 +615,8 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti auto caches = FileCacheFactory::instance().getAll(); for (const auto & [_, cache_data] : caches) { - new_values["FilesystemCacheBytes"] = cache_data.cache->getUsedCacheSize(); - new_values["FilesystemCacheFiles"] = cache_data.cache->getFileSegmentsNum(); + new_values["FilesystemCacheBytes"] = cache_data->cache->getUsedCacheSize(); + new_values["FilesystemCacheFiles"] = cache_data->cache->getFileSegmentsNum(); } } diff --git a/src/Interpreters/InterpreterDescribeCacheQuery.cpp b/src/Interpreters/InterpreterDescribeCacheQuery.cpp index 554153922b5..dd6df26c6af 100644 --- a/src/Interpreters/InterpreterDescribeCacheQuery.cpp +++ b/src/Interpreters/InterpreterDescribeCacheQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -6,6 +7,7 @@ #include #include #include +#include #include namespace DB @@ -28,7 +30,7 @@ static Block getSampleBlock() BlockIO InterpreterDescribeCacheQuery::execute() { - ColumnsDescription columns; + getContext()->checkAccess(AccessType::SHOW_CACHES); const auto & ast = query_ptr->as(); Block sample_block = getSampleBlock(); diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index 609df1404ca..d623b6c71d6 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -5,6 +5,11 @@ #include #include #include +#include +#include +#include +#include +#include #include #include @@ -142,6 +147,24 @@ String InterpreterShowTablesQuery::getRewrittenQuery() BlockIO InterpreterShowTablesQuery::execute() { + const auto & query = query_ptr->as(); + if (query.caches) + { + getContext()->checkAccess(AccessType::SHOW_CACHES); + + Block sample_block{ColumnWithTypeAndName(std::make_shared(), "Caches")}; + MutableColumns res_columns = sample_block.cloneEmptyColumns(); + auto caches = FileCacheFactory::instance().getAllByName(); + for (const auto & [name, _] : caches) + res_columns[0]->insert(name); + BlockIO res; + size_t num_rows = res_columns[0]->size(); + auto source = std::make_shared(sample_block, Chunk(std::move(res_columns), num_rows)); + res.pipeline = QueryPipeline(std::move(source)); + + return res; + } + return executeQuery(getRewrittenQuery(), getContext(), true); } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 9196a5222a2..67e2c8b6ecd 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -316,7 +316,7 @@ BlockIO InterpreterSystemQuery::execute() { auto caches = FileCacheFactory::instance().getAll(); for (const auto & [_, cache_data] : caches) - cache_data.cache->removeIfReleasable(/* remove_persistent_files */false); + cache_data->cache->removeIfReleasable(/* remove_persistent_files */false); } else { diff --git a/src/Parsers/ASTShowTablesQuery.cpp b/src/Parsers/ASTShowTablesQuery.cpp index 61d68c4a273..42cc0077efa 100644 --- a/src/Parsers/ASTShowTablesQuery.cpp +++ b/src/Parsers/ASTShowTablesQuery.cpp @@ -55,6 +55,12 @@ void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, Format settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CLUSTER" << (settings.hilite ? hilite_none : ""); settings.ostr << " " << backQuoteIfNeed(cluster_str); } + else if (caches) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CACHES" << (settings.hilite ? hilite_none : ""); + formatLike(settings); + formatLimit(settings, state, frame); + } else if (m_settings) { settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (changed ? "CHANGED " : "") << "SETTINGS" << diff --git a/src/Parsers/ASTShowTablesQuery.h b/src/Parsers/ASTShowTablesQuery.h index 57383dff66f..04cf9d6645a 100644 --- a/src/Parsers/ASTShowTablesQuery.h +++ b/src/Parsers/ASTShowTablesQuery.h @@ -9,7 +9,7 @@ namespace DB { -/** Query SHOW TABLES or SHOW DATABASES or SHOW CLUSTERS +/** Query SHOW TABLES or SHOW DATABASES or SHOW CLUSTERS or SHOW CACHES */ class ASTShowTablesQuery : public ASTQueryWithOutput { @@ -21,6 +21,7 @@ public: bool m_settings{false}; bool changed{false}; bool temporary{false}; + bool caches{false}; String cluster_str; String from; diff --git a/src/Parsers/ParserShowTablesQuery.cpp b/src/Parsers/ParserShowTablesQuery.cpp index b5c8687321e..46c783b8271 100644 --- a/src/Parsers/ParserShowTablesQuery.cpp +++ b/src/Parsers/ParserShowTablesQuery.cpp @@ -24,6 +24,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserKeyword s_clusters("CLUSTERS"); ParserKeyword s_cluster("CLUSTER"); ParserKeyword s_dictionaries("DICTIONARIES"); + ParserKeyword s_caches("CACHES"); ParserKeyword s_settings("SETTINGS"); ParserKeyword s_changed("CHANGED"); ParserKeyword s_from("FROM"); @@ -91,6 +92,10 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; } } + else if (s_caches.ignore(pos, expected)) + { + query->caches = true; + } else if (s_cluster.ignore(pos, expected)) { query->cluster = true; diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp index 4b76163363a..2baddadec90 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemFilesystemCache.cpp @@ -38,7 +38,7 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex for (const auto & [cache_base_path, cache_data] : caches) { - const auto & cache = cache_data.cache; + const auto & cache = cache_data->cache; auto file_segments = cache->getSnapshot(); for (const auto & file_segment : file_segments) From 94a9af41910ca8c2d932846765d766772e5bc21c Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 23 Jun 2022 17:54:11 +0200 Subject: [PATCH 040/101] Add test --- tests/queries/0_stateless/02344_describe_cache.reference | 1 + tests/queries/0_stateless/02344_describe_cache.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 tests/queries/0_stateless/02344_describe_cache.reference create mode 100644 tests/queries/0_stateless/02344_describe_cache.sql diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference new file mode 100644 index 00000000000..affee889537 --- /dev/null +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -0,0 +1 @@ +22548578304 1048576 104857600 1 0 0 0 ./s3_cache/ diff --git a/tests/queries/0_stateless/02344_describe_cache.sql b/tests/queries/0_stateless/02344_describe_cache.sql new file mode 100644 index 00000000000..aa6a409cd49 --- /dev/null +++ b/tests/queries/0_stateless/02344_describe_cache.sql @@ -0,0 +1 @@ +DESCRIBE CACHE 's3disk'; From dfae6a5634f868d37889f671e2470950bb2a916f Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 23 Jun 2022 14:53:39 +0200 Subject: [PATCH 041/101] Upload logs for getting all tests command --- tests/integration/ci-runner.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index e4bd1be9027..562497fe8b0 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -367,7 +367,7 @@ class ClickhouseIntegrationTestsRunner: def _get_all_tests(self, repo_path): image_cmd = self._get_runner_image_cmd(repo_path) out_file = "all_tests.txt" - out_file_full = "all_tests_full.txt" + out_file_full = os.path.join(self.result_path, "runner_get_all_tests.log") cmd = ( "cd {repo_path}/tests/integration && " "timeout -s 9 1h ./runner {runner_opts} {image_cmd} ' --setup-plan' " @@ -393,21 +393,16 @@ class ClickhouseIntegrationTestsRunner: not os.path.isfile(all_tests_file_path) or os.path.getsize(all_tests_file_path) == 0 ): - all_tests_full_file_path = ( - "{repo_path}/tests/integration/{out_file}".format( - repo_path=repo_path, out_file=out_file_full - ) - ) - if os.path.isfile(all_tests_full_file_path): + if os.path.isfile(out_file_full): # log runner output logging.info("runner output:") - with open(all_tests_full_file_path, "r") as all_tests_full_file: + with open(out_file_full, "r") as all_tests_full_file: for line in all_tests_full_file: line = line.rstrip() if line: logging.info("runner output: %s", line) else: - logging.info("runner output '%s' is empty", all_tests_full_file_path) + logging.info("runner output '%s' is empty", out_file_full) raise Exception( "There is something wrong with getting all tests list: file '{}' is empty or does not exist.".format( From fe0ab845f57a5501e78a5c78f9c0725d98987d80 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 23 Jun 2022 20:59:43 +0200 Subject: [PATCH 042/101] Update tests --- tests/queries/0_stateless/01271_show_privileges.reference | 1 + .../0_stateless/02117_show_create_table_system.reference | 6 +++--- tests/queries/0_stateless/02344_describe_cache.sql | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 93f93683fc4..37ca20b92a1 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -3,6 +3,7 @@ SHOW TABLES [] TABLE SHOW SHOW COLUMNS [] COLUMN SHOW SHOW DICTIONARIES [] DICTIONARY SHOW SHOW [] \N ALL +SHOW CACHES [] \N ALL SELECT [] COLUMN ALL INSERT [] COLUMN ALL ALTER UPDATE ['UPDATE'] COLUMN ALTER TABLE diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 100e72d9a61..e4b73bb439e 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -277,7 +277,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SELECT' = 5, 'INSERT' = 6, 'ALTER UPDATE' = 7, 'ALTER DELETE' = 8, 'ALTER ADD COLUMN' = 9, 'ALTER MODIFY COLUMN' = 10, 'ALTER DROP COLUMN' = 11, 'ALTER COMMENT COLUMN' = 12, 'ALTER CLEAR COLUMN' = 13, 'ALTER RENAME COLUMN' = 14, 'ALTER MATERIALIZE COLUMN' = 15, 'ALTER COLUMN' = 16, 'ALTER MODIFY COMMENT' = 17, 'ALTER ORDER BY' = 18, 'ALTER SAMPLE BY' = 19, 'ALTER ADD INDEX' = 20, 'ALTER DROP INDEX' = 21, 'ALTER MATERIALIZE INDEX' = 22, 'ALTER CLEAR INDEX' = 23, 'ALTER INDEX' = 24, 'ALTER ADD PROJECTION' = 25, 'ALTER DROP PROJECTION' = 26, 'ALTER MATERIALIZE PROJECTION' = 27, 'ALTER CLEAR PROJECTION' = 28, 'ALTER PROJECTION' = 29, 'ALTER ADD CONSTRAINT' = 30, 'ALTER DROP CONSTRAINT' = 31, 'ALTER CONSTRAINT' = 32, 'ALTER TTL' = 33, 'ALTER MATERIALIZE TTL' = 34, 'ALTER SETTINGS' = 35, 'ALTER MOVE PARTITION' = 36, 'ALTER FETCH PARTITION' = 37, 'ALTER FREEZE PARTITION' = 38, 'ALTER DATABASE SETTINGS' = 39, 'ALTER TABLE' = 40, 'ALTER DATABASE' = 41, 'ALTER VIEW REFRESH' = 42, 'ALTER VIEW MODIFY QUERY' = 43, 'ALTER VIEW' = 44, 'ALTER' = 45, 'CREATE DATABASE' = 46, 'CREATE TABLE' = 47, 'CREATE VIEW' = 48, 'CREATE DICTIONARY' = 49, 'CREATE TEMPORARY TABLE' = 50, 'CREATE FUNCTION' = 51, 'CREATE' = 52, 'DROP DATABASE' = 53, 'DROP TABLE' = 54, 'DROP VIEW' = 55, 'DROP DICTIONARY' = 56, 'DROP FUNCTION' = 57, 'DROP' = 58, 'TRUNCATE' = 59, 'OPTIMIZE' = 60, 'BACKUP' = 61, 'KILL QUERY' = 62, 'KILL TRANSACTION' = 63, 'MOVE PARTITION BETWEEN SHARDS' = 64, 'CREATE USER' = 65, 'ALTER USER' = 66, 'DROP USER' = 67, 'CREATE ROLE' = 68, 'ALTER ROLE' = 69, 'DROP ROLE' = 70, 'ROLE ADMIN' = 71, 'CREATE ROW POLICY' = 72, 'ALTER ROW POLICY' = 73, 'DROP ROW POLICY' = 74, 'CREATE QUOTA' = 75, 'ALTER QUOTA' = 76, 'DROP QUOTA' = 77, 'CREATE SETTINGS PROFILE' = 78, 'ALTER SETTINGS PROFILE' = 79, 'DROP SETTINGS PROFILE' = 80, 'SHOW USERS' = 81, 'SHOW ROLES' = 82, 'SHOW ROW POLICIES' = 83, 'SHOW QUOTAS' = 84, 'SHOW SETTINGS PROFILES' = 85, 'SHOW ACCESS' = 86, 'ACCESS MANAGEMENT' = 87, 'SYSTEM SHUTDOWN' = 88, 'SYSTEM DROP DNS CACHE' = 89, 'SYSTEM DROP MARK CACHE' = 90, 'SYSTEM DROP UNCOMPRESSED CACHE' = 91, 'SYSTEM DROP MMAP CACHE' = 92, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 93, 'SYSTEM DROP CACHE' = 94, 'SYSTEM RELOAD CONFIG' = 95, 'SYSTEM RELOAD SYMBOLS' = 96, 'SYSTEM RELOAD DICTIONARY' = 97, 'SYSTEM RELOAD MODEL' = 98, 'SYSTEM RELOAD FUNCTION' = 99, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 100, 'SYSTEM RELOAD' = 101, 'SYSTEM RESTART DISK' = 102, 'SYSTEM MERGES' = 103, 'SYSTEM TTL MERGES' = 104, 'SYSTEM FETCHES' = 105, 'SYSTEM MOVES' = 106, 'SYSTEM DISTRIBUTED SENDS' = 107, 'SYSTEM REPLICATED SENDS' = 108, 'SYSTEM SENDS' = 109, 'SYSTEM REPLICATION QUEUES' = 110, 'SYSTEM DROP REPLICA' = 111, 'SYSTEM SYNC REPLICA' = 112, 'SYSTEM RESTART REPLICA' = 113, 'SYSTEM RESTORE REPLICA' = 114, 'SYSTEM SYNC DATABASE REPLICA' = 115, 'SYSTEM SYNC TRANSACTION LOG' = 116, 'SYSTEM FLUSH DISTRIBUTED' = 117, 'SYSTEM FLUSH LOGS' = 118, 'SYSTEM FLUSH' = 119, 'SYSTEM THREAD FUZZER' = 120, 'SYSTEM UNFREEZE' = 121, 'SYSTEM' = 122, 'dictGet' = 123, 'addressToLine' = 124, 'addressToLineWithInlines' = 125, 'addressToSymbol' = 126, 'demangle' = 127, 'INTROSPECTION' = 128, 'FILE' = 129, 'URL' = 130, 'REMOTE' = 131, 'MONGO' = 132, 'MEILISEARCH' = 133, 'MYSQL' = 134, 'POSTGRES' = 135, 'SQLITE' = 136, 'ODBC' = 137, 'JDBC' = 138, 'HDFS' = 139, 'S3' = 140, 'HIVE' = 141, 'SOURCES' = 142, 'CLUSTER' = 143, 'ALL' = 144, 'NONE' = 145), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW ACCESS' = 87, 'ACCESS MANAGEMENT' = 88, 'SYSTEM SHUTDOWN' = 89, 'SYSTEM DROP DNS CACHE' = 90, 'SYSTEM DROP MARK CACHE' = 91, 'SYSTEM DROP UNCOMPRESSED CACHE' = 92, 'SYSTEM DROP MMAP CACHE' = 93, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 94, 'SYSTEM DROP CACHE' = 95, 'SYSTEM RELOAD CONFIG' = 96, 'SYSTEM RELOAD SYMBOLS' = 97, 'SYSTEM RELOAD DICTIONARY' = 98, 'SYSTEM RELOAD MODEL' = 99, 'SYSTEM RELOAD FUNCTION' = 100, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 101, 'SYSTEM RELOAD' = 102, 'SYSTEM RESTART DISK' = 103, 'SYSTEM MERGES' = 104, 'SYSTEM TTL MERGES' = 105, 'SYSTEM FETCHES' = 106, 'SYSTEM MOVES' = 107, 'SYSTEM DISTRIBUTED SENDS' = 108, 'SYSTEM REPLICATED SENDS' = 109, 'SYSTEM SENDS' = 110, 'SYSTEM REPLICATION QUEUES' = 111, 'SYSTEM DROP REPLICA' = 112, 'SYSTEM SYNC REPLICA' = 113, 'SYSTEM RESTART REPLICA' = 114, 'SYSTEM RESTORE REPLICA' = 115, 'SYSTEM SYNC DATABASE REPLICA' = 116, 'SYSTEM SYNC TRANSACTION LOG' = 117, 'SYSTEM FLUSH DISTRIBUTED' = 118, 'SYSTEM FLUSH LOGS' = 119, 'SYSTEM FLUSH' = 120, 'SYSTEM THREAD FUZZER' = 121, 'SYSTEM UNFREEZE' = 122, 'SYSTEM' = 123, 'dictGet' = 124, 'addressToLine' = 125, 'addressToLineWithInlines' = 126, 'addressToSymbol' = 127, 'demangle' = 128, 'INTROSPECTION' = 129, 'FILE' = 130, 'URL' = 131, 'REMOTE' = 132, 'MONGO' = 133, 'MEILISEARCH' = 134, 'MYSQL' = 135, 'POSTGRES' = 136, 'SQLITE' = 137, 'ODBC' = 138, 'JDBC' = 139, 'HDFS' = 140, 'S3' = 141, 'HIVE' = 142, 'SOURCES' = 143, 'CLUSTER' = 144, 'ALL' = 145, 'NONE' = 146), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -551,10 +551,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SELECT' = 5, 'INSERT' = 6, 'ALTER UPDATE' = 7, 'ALTER DELETE' = 8, 'ALTER ADD COLUMN' = 9, 'ALTER MODIFY COLUMN' = 10, 'ALTER DROP COLUMN' = 11, 'ALTER COMMENT COLUMN' = 12, 'ALTER CLEAR COLUMN' = 13, 'ALTER RENAME COLUMN' = 14, 'ALTER MATERIALIZE COLUMN' = 15, 'ALTER COLUMN' = 16, 'ALTER MODIFY COMMENT' = 17, 'ALTER ORDER BY' = 18, 'ALTER SAMPLE BY' = 19, 'ALTER ADD INDEX' = 20, 'ALTER DROP INDEX' = 21, 'ALTER MATERIALIZE INDEX' = 22, 'ALTER CLEAR INDEX' = 23, 'ALTER INDEX' = 24, 'ALTER ADD PROJECTION' = 25, 'ALTER DROP PROJECTION' = 26, 'ALTER MATERIALIZE PROJECTION' = 27, 'ALTER CLEAR PROJECTION' = 28, 'ALTER PROJECTION' = 29, 'ALTER ADD CONSTRAINT' = 30, 'ALTER DROP CONSTRAINT' = 31, 'ALTER CONSTRAINT' = 32, 'ALTER TTL' = 33, 'ALTER MATERIALIZE TTL' = 34, 'ALTER SETTINGS' = 35, 'ALTER MOVE PARTITION' = 36, 'ALTER FETCH PARTITION' = 37, 'ALTER FREEZE PARTITION' = 38, 'ALTER DATABASE SETTINGS' = 39, 'ALTER TABLE' = 40, 'ALTER DATABASE' = 41, 'ALTER VIEW REFRESH' = 42, 'ALTER VIEW MODIFY QUERY' = 43, 'ALTER VIEW' = 44, 'ALTER' = 45, 'CREATE DATABASE' = 46, 'CREATE TABLE' = 47, 'CREATE VIEW' = 48, 'CREATE DICTIONARY' = 49, 'CREATE TEMPORARY TABLE' = 50, 'CREATE FUNCTION' = 51, 'CREATE' = 52, 'DROP DATABASE' = 53, 'DROP TABLE' = 54, 'DROP VIEW' = 55, 'DROP DICTIONARY' = 56, 'DROP FUNCTION' = 57, 'DROP' = 58, 'TRUNCATE' = 59, 'OPTIMIZE' = 60, 'BACKUP' = 61, 'KILL QUERY' = 62, 'KILL TRANSACTION' = 63, 'MOVE PARTITION BETWEEN SHARDS' = 64, 'CREATE USER' = 65, 'ALTER USER' = 66, 'DROP USER' = 67, 'CREATE ROLE' = 68, 'ALTER ROLE' = 69, 'DROP ROLE' = 70, 'ROLE ADMIN' = 71, 'CREATE ROW POLICY' = 72, 'ALTER ROW POLICY' = 73, 'DROP ROW POLICY' = 74, 'CREATE QUOTA' = 75, 'ALTER QUOTA' = 76, 'DROP QUOTA' = 77, 'CREATE SETTINGS PROFILE' = 78, 'ALTER SETTINGS PROFILE' = 79, 'DROP SETTINGS PROFILE' = 80, 'SHOW USERS' = 81, 'SHOW ROLES' = 82, 'SHOW ROW POLICIES' = 83, 'SHOW QUOTAS' = 84, 'SHOW SETTINGS PROFILES' = 85, 'SHOW ACCESS' = 86, 'ACCESS MANAGEMENT' = 87, 'SYSTEM SHUTDOWN' = 88, 'SYSTEM DROP DNS CACHE' = 89, 'SYSTEM DROP MARK CACHE' = 90, 'SYSTEM DROP UNCOMPRESSED CACHE' = 91, 'SYSTEM DROP MMAP CACHE' = 92, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 93, 'SYSTEM DROP CACHE' = 94, 'SYSTEM RELOAD CONFIG' = 95, 'SYSTEM RELOAD SYMBOLS' = 96, 'SYSTEM RELOAD DICTIONARY' = 97, 'SYSTEM RELOAD MODEL' = 98, 'SYSTEM RELOAD FUNCTION' = 99, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 100, 'SYSTEM RELOAD' = 101, 'SYSTEM RESTART DISK' = 102, 'SYSTEM MERGES' = 103, 'SYSTEM TTL MERGES' = 104, 'SYSTEM FETCHES' = 105, 'SYSTEM MOVES' = 106, 'SYSTEM DISTRIBUTED SENDS' = 107, 'SYSTEM REPLICATED SENDS' = 108, 'SYSTEM SENDS' = 109, 'SYSTEM REPLICATION QUEUES' = 110, 'SYSTEM DROP REPLICA' = 111, 'SYSTEM SYNC REPLICA' = 112, 'SYSTEM RESTART REPLICA' = 113, 'SYSTEM RESTORE REPLICA' = 114, 'SYSTEM SYNC DATABASE REPLICA' = 115, 'SYSTEM SYNC TRANSACTION LOG' = 116, 'SYSTEM FLUSH DISTRIBUTED' = 117, 'SYSTEM FLUSH LOGS' = 118, 'SYSTEM FLUSH' = 119, 'SYSTEM THREAD FUZZER' = 120, 'SYSTEM UNFREEZE' = 121, 'SYSTEM' = 122, 'dictGet' = 123, 'addressToLine' = 124, 'addressToLineWithInlines' = 125, 'addressToSymbol' = 126, 'demangle' = 127, 'INTROSPECTION' = 128, 'FILE' = 129, 'URL' = 130, 'REMOTE' = 131, 'MONGO' = 132, 'MEILISEARCH' = 133, 'MYSQL' = 134, 'POSTGRES' = 135, 'SQLITE' = 136, 'ODBC' = 137, 'JDBC' = 138, 'HDFS' = 139, 'S3' = 140, 'HIVE' = 141, 'SOURCES' = 142, 'CLUSTER' = 143, 'ALL' = 144, 'NONE' = 145), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW ACCESS' = 87, 'ACCESS MANAGEMENT' = 88, 'SYSTEM SHUTDOWN' = 89, 'SYSTEM DROP DNS CACHE' = 90, 'SYSTEM DROP MARK CACHE' = 91, 'SYSTEM DROP UNCOMPRESSED CACHE' = 92, 'SYSTEM DROP MMAP CACHE' = 93, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 94, 'SYSTEM DROP CACHE' = 95, 'SYSTEM RELOAD CONFIG' = 96, 'SYSTEM RELOAD SYMBOLS' = 97, 'SYSTEM RELOAD DICTIONARY' = 98, 'SYSTEM RELOAD MODEL' = 99, 'SYSTEM RELOAD FUNCTION' = 100, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 101, 'SYSTEM RELOAD' = 102, 'SYSTEM RESTART DISK' = 103, 'SYSTEM MERGES' = 104, 'SYSTEM TTL MERGES' = 105, 'SYSTEM FETCHES' = 106, 'SYSTEM MOVES' = 107, 'SYSTEM DISTRIBUTED SENDS' = 108, 'SYSTEM REPLICATED SENDS' = 109, 'SYSTEM SENDS' = 110, 'SYSTEM REPLICATION QUEUES' = 111, 'SYSTEM DROP REPLICA' = 112, 'SYSTEM SYNC REPLICA' = 113, 'SYSTEM RESTART REPLICA' = 114, 'SYSTEM RESTORE REPLICA' = 115, 'SYSTEM SYNC DATABASE REPLICA' = 116, 'SYSTEM SYNC TRANSACTION LOG' = 117, 'SYSTEM FLUSH DISTRIBUTED' = 118, 'SYSTEM FLUSH LOGS' = 119, 'SYSTEM FLUSH' = 120, 'SYSTEM THREAD FUZZER' = 121, 'SYSTEM UNFREEZE' = 122, 'SYSTEM' = 123, 'dictGet' = 124, 'addressToLine' = 125, 'addressToLineWithInlines' = 126, 'addressToSymbol' = 127, 'demangle' = 128, 'INTROSPECTION' = 129, 'FILE' = 130, 'URL' = 131, 'REMOTE' = 132, 'MONGO' = 133, 'MEILISEARCH' = 134, 'MYSQL' = 135, 'POSTGRES' = 136, 'SQLITE' = 137, 'ODBC' = 138, 'JDBC' = 139, 'HDFS' = 140, 'S3' = 141, 'HIVE' = 142, 'SOURCES' = 143, 'CLUSTER' = 144, 'ALL' = 145, 'NONE' = 146), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SELECT' = 5, 'INSERT' = 6, 'ALTER UPDATE' = 7, 'ALTER DELETE' = 8, 'ALTER ADD COLUMN' = 9, 'ALTER MODIFY COLUMN' = 10, 'ALTER DROP COLUMN' = 11, 'ALTER COMMENT COLUMN' = 12, 'ALTER CLEAR COLUMN' = 13, 'ALTER RENAME COLUMN' = 14, 'ALTER MATERIALIZE COLUMN' = 15, 'ALTER COLUMN' = 16, 'ALTER MODIFY COMMENT' = 17, 'ALTER ORDER BY' = 18, 'ALTER SAMPLE BY' = 19, 'ALTER ADD INDEX' = 20, 'ALTER DROP INDEX' = 21, 'ALTER MATERIALIZE INDEX' = 22, 'ALTER CLEAR INDEX' = 23, 'ALTER INDEX' = 24, 'ALTER ADD PROJECTION' = 25, 'ALTER DROP PROJECTION' = 26, 'ALTER MATERIALIZE PROJECTION' = 27, 'ALTER CLEAR PROJECTION' = 28, 'ALTER PROJECTION' = 29, 'ALTER ADD CONSTRAINT' = 30, 'ALTER DROP CONSTRAINT' = 31, 'ALTER CONSTRAINT' = 32, 'ALTER TTL' = 33, 'ALTER MATERIALIZE TTL' = 34, 'ALTER SETTINGS' = 35, 'ALTER MOVE PARTITION' = 36, 'ALTER FETCH PARTITION' = 37, 'ALTER FREEZE PARTITION' = 38, 'ALTER DATABASE SETTINGS' = 39, 'ALTER TABLE' = 40, 'ALTER DATABASE' = 41, 'ALTER VIEW REFRESH' = 42, 'ALTER VIEW MODIFY QUERY' = 43, 'ALTER VIEW' = 44, 'ALTER' = 45, 'CREATE DATABASE' = 46, 'CREATE TABLE' = 47, 'CREATE VIEW' = 48, 'CREATE DICTIONARY' = 49, 'CREATE TEMPORARY TABLE' = 50, 'CREATE FUNCTION' = 51, 'CREATE' = 52, 'DROP DATABASE' = 53, 'DROP TABLE' = 54, 'DROP VIEW' = 55, 'DROP DICTIONARY' = 56, 'DROP FUNCTION' = 57, 'DROP' = 58, 'TRUNCATE' = 59, 'OPTIMIZE' = 60, 'BACKUP' = 61, 'KILL QUERY' = 62, 'KILL TRANSACTION' = 63, 'MOVE PARTITION BETWEEN SHARDS' = 64, 'CREATE USER' = 65, 'ALTER USER' = 66, 'DROP USER' = 67, 'CREATE ROLE' = 68, 'ALTER ROLE' = 69, 'DROP ROLE' = 70, 'ROLE ADMIN' = 71, 'CREATE ROW POLICY' = 72, 'ALTER ROW POLICY' = 73, 'DROP ROW POLICY' = 74, 'CREATE QUOTA' = 75, 'ALTER QUOTA' = 76, 'DROP QUOTA' = 77, 'CREATE SETTINGS PROFILE' = 78, 'ALTER SETTINGS PROFILE' = 79, 'DROP SETTINGS PROFILE' = 80, 'SHOW USERS' = 81, 'SHOW ROLES' = 82, 'SHOW ROW POLICIES' = 83, 'SHOW QUOTAS' = 84, 'SHOW SETTINGS PROFILES' = 85, 'SHOW ACCESS' = 86, 'ACCESS MANAGEMENT' = 87, 'SYSTEM SHUTDOWN' = 88, 'SYSTEM DROP DNS CACHE' = 89, 'SYSTEM DROP MARK CACHE' = 90, 'SYSTEM DROP UNCOMPRESSED CACHE' = 91, 'SYSTEM DROP MMAP CACHE' = 92, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 93, 'SYSTEM DROP CACHE' = 94, 'SYSTEM RELOAD CONFIG' = 95, 'SYSTEM RELOAD SYMBOLS' = 96, 'SYSTEM RELOAD DICTIONARY' = 97, 'SYSTEM RELOAD MODEL' = 98, 'SYSTEM RELOAD FUNCTION' = 99, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 100, 'SYSTEM RELOAD' = 101, 'SYSTEM RESTART DISK' = 102, 'SYSTEM MERGES' = 103, 'SYSTEM TTL MERGES' = 104, 'SYSTEM FETCHES' = 105, 'SYSTEM MOVES' = 106, 'SYSTEM DISTRIBUTED SENDS' = 107, 'SYSTEM REPLICATED SENDS' = 108, 'SYSTEM SENDS' = 109, 'SYSTEM REPLICATION QUEUES' = 110, 'SYSTEM DROP REPLICA' = 111, 'SYSTEM SYNC REPLICA' = 112, 'SYSTEM RESTART REPLICA' = 113, 'SYSTEM RESTORE REPLICA' = 114, 'SYSTEM SYNC DATABASE REPLICA' = 115, 'SYSTEM SYNC TRANSACTION LOG' = 116, 'SYSTEM FLUSH DISTRIBUTED' = 117, 'SYSTEM FLUSH LOGS' = 118, 'SYSTEM FLUSH' = 119, 'SYSTEM THREAD FUZZER' = 120, 'SYSTEM UNFREEZE' = 121, 'SYSTEM' = 122, 'dictGet' = 123, 'addressToLine' = 124, 'addressToLineWithInlines' = 125, 'addressToSymbol' = 126, 'demangle' = 127, 'INTROSPECTION' = 128, 'FILE' = 129, 'URL' = 130, 'REMOTE' = 131, 'MONGO' = 132, 'MEILISEARCH' = 133, 'MYSQL' = 134, 'POSTGRES' = 135, 'SQLITE' = 136, 'ODBC' = 137, 'JDBC' = 138, 'HDFS' = 139, 'S3' = 140, 'HIVE' = 141, 'SOURCES' = 142, 'CLUSTER' = 143, 'ALL' = 144, 'NONE' = 145)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW ACCESS' = 87, 'ACCESS MANAGEMENT' = 88, 'SYSTEM SHUTDOWN' = 89, 'SYSTEM DROP DNS CACHE' = 90, 'SYSTEM DROP MARK CACHE' = 91, 'SYSTEM DROP UNCOMPRESSED CACHE' = 92, 'SYSTEM DROP MMAP CACHE' = 93, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 94, 'SYSTEM DROP CACHE' = 95, 'SYSTEM RELOAD CONFIG' = 96, 'SYSTEM RELOAD SYMBOLS' = 97, 'SYSTEM RELOAD DICTIONARY' = 98, 'SYSTEM RELOAD MODEL' = 99, 'SYSTEM RELOAD FUNCTION' = 100, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 101, 'SYSTEM RELOAD' = 102, 'SYSTEM RESTART DISK' = 103, 'SYSTEM MERGES' = 104, 'SYSTEM TTL MERGES' = 105, 'SYSTEM FETCHES' = 106, 'SYSTEM MOVES' = 107, 'SYSTEM DISTRIBUTED SENDS' = 108, 'SYSTEM REPLICATED SENDS' = 109, 'SYSTEM SENDS' = 110, 'SYSTEM REPLICATION QUEUES' = 111, 'SYSTEM DROP REPLICA' = 112, 'SYSTEM SYNC REPLICA' = 113, 'SYSTEM RESTART REPLICA' = 114, 'SYSTEM RESTORE REPLICA' = 115, 'SYSTEM SYNC DATABASE REPLICA' = 116, 'SYSTEM SYNC TRANSACTION LOG' = 117, 'SYSTEM FLUSH DISTRIBUTED' = 118, 'SYSTEM FLUSH LOGS' = 119, 'SYSTEM FLUSH' = 120, 'SYSTEM THREAD FUZZER' = 121, 'SYSTEM UNFREEZE' = 122, 'SYSTEM' = 123, 'dictGet' = 124, 'addressToLine' = 125, 'addressToLineWithInlines' = 126, 'addressToSymbol' = 127, 'demangle' = 128, 'INTROSPECTION' = 129, 'FILE' = 130, 'URL' = 131, 'REMOTE' = 132, 'MONGO' = 133, 'MEILISEARCH' = 134, 'MYSQL' = 135, 'POSTGRES' = 136, 'SQLITE' = 137, 'ODBC' = 138, 'JDBC' = 139, 'HDFS' = 140, 'S3' = 141, 'HIVE' = 142, 'SOURCES' = 143, 'CLUSTER' = 144, 'ALL' = 145, 'NONE' = 146)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' diff --git a/tests/queries/0_stateless/02344_describe_cache.sql b/tests/queries/0_stateless/02344_describe_cache.sql index aa6a409cd49..1c8a105d8e8 100644 --- a/tests/queries/0_stateless/02344_describe_cache.sql +++ b/tests/queries/0_stateless/02344_describe_cache.sql @@ -1 +1,3 @@ +-- tags: no-fasttest + DESCRIBE CACHE 's3disk'; From dd8203038f249080bbcf4e937e81a9c651c882d1 Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 24 Jun 2022 00:36:57 +0500 Subject: [PATCH 043/101] updated exception handling --- src/Functions/FunctionBase58Conversion.h | 26 ++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionBase58Conversion.h b/src/Functions/FunctionBase58Conversion.h index ed4667aa63b..6979ce849f4 100644 --- a/src/Functions/FunctionBase58Conversion.h +++ b/src/Functions/FunctionBase58Conversion.h @@ -61,7 +61,18 @@ struct Base58Encode /// This way we do exponential resizes and one final resize after whole operation is complete encoded.clear(); if (srclen) - encoder.encode(encoded, source, srclen); + try + { + encoder.encode(encoded, source, srclen); + } + catch (const std::invalid_argument& e) + { + throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS); + } + catch (const std::domain_error& e) + { + throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS); + } size_t outlen = encoded.size(); if (processed_size + outlen >= current_allocated_size) @@ -126,7 +137,18 @@ struct Base58Decode /// This way we do exponential resizes and one final resize after whole operation is complete decoded.clear(); if (srclen) - decoder.decode(decoded, source, srclen); + try + { + decoder.decode(decoded, source, srclen); + } + catch (const std::invalid_argument& e) + { + throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS); + } + catch (const std::domain_error& e) + { + throw Exception(e.what(), ErrorCodes::BAD_ARGUMENTS); + } size_t outlen = decoded.size(); if (processed_size + outlen >= current_allocated_size) From bc9b56096ff2087f94aa76878805ef9a284d8b66 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 23 Jun 2022 21:52:57 +0200 Subject: [PATCH 044/101] Fix --- .../MaterializedPostgreSQLConsumer.cpp | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index 61a40c374eb..a57328fb402 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -271,8 +271,24 @@ void MaterializedPostgreSQLConsumer::readTupleData( } }; + std::exception_ptr error; for (int column_idx = 0; column_idx < num_columns; ++column_idx) - proccess_column_value(readInt8(message, pos, size), column_idx); + { + try + { + proccess_column_value(readInt8(message, pos, size), column_idx); + } + catch (...) + { + insertDefaultValue(buffer, column_idx); + /// Let's collect only the first exception. + /// This delaying of error throw is needed because + /// some errors can be ignored and just logged, + /// but in this case we need to finish insertion to all columns. + if (!error) + error = std::current_exception(); + } + } switch (type) { @@ -303,6 +319,9 @@ void MaterializedPostgreSQLConsumer::readTupleData( break; } } + + if (error) + std::rethrow_exception(error); } From ac4ad66985db6a01b6c6476c63c586af25ea1718 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 24 Jun 2022 00:05:07 +0200 Subject: [PATCH 045/101] Update 02344_describe_cache.sql --- tests/queries/0_stateless/02344_describe_cache.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02344_describe_cache.sql b/tests/queries/0_stateless/02344_describe_cache.sql index 1c8a105d8e8..cb5a6bcc00d 100644 --- a/tests/queries/0_stateless/02344_describe_cache.sql +++ b/tests/queries/0_stateless/02344_describe_cache.sql @@ -1,3 +1,3 @@ -- tags: no-fasttest -DESCRIBE CACHE 's3disk'; +DESCRIBE CACHE 's3_cache'; From dc73042d622b00f25b838f793d8001ecc516f1dc Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 24 Jun 2022 01:05:33 +0200 Subject: [PATCH 046/101] Better error messafe --- programs/odbc-bridge/ODBCPooledConnectionFactory.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/programs/odbc-bridge/ODBCPooledConnectionFactory.h b/programs/odbc-bridge/ODBCPooledConnectionFactory.h index 4d8d3f50ab9..f6185bffd1d 100644 --- a/programs/odbc-bridge/ODBCPooledConnectionFactory.h +++ b/programs/odbc-bridge/ODBCPooledConnectionFactory.h @@ -91,7 +91,11 @@ T execute(nanodbc::ConnectionHolderPtr connection_holder, std::function Date: Fri, 24 Jun 2022 02:44:22 +0200 Subject: [PATCH 047/101] Fix --- .../PostgreSQL/fetchPostgreSQLTableStructure.cpp | 12 +++++++----- tests/integration/test_storage_postgresql/test.py | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp index 9f136efa1ff..08a7e78d0e9 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp @@ -263,10 +263,11 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure( "WHERE attrelid = (SELECT oid FROM pg_class WHERE {}) " "AND NOT attisdropped AND attnum > 0", where); - table.physical_columns = readNamesAndTypesList(tx, postgres_table, query, use_nulls, false); + auto postgres_table_with_schema = postgres_schema.empty() ? postgres_table : doubleQuoteString(postgres_schema) + '.' + doubleQuoteString(postgres_table); + table.physical_columns = readNamesAndTypesList(tx, postgres_table_with_schema, query, use_nulls, false); if (!table.physical_columns) - throw Exception(ErrorCodes::UNKNOWN_TABLE, "PostgreSQL table {} does not exist", postgres_table); + throw Exception(ErrorCodes::UNKNOWN_TABLE, "PostgreSQL table {} does not exist", postgres_table_with_schema); if (with_primary_key) { @@ -278,7 +279,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure( "AND a.attnum = ANY(i.indkey) " "WHERE attrelid = (SELECT oid FROM pg_class WHERE {}) AND i.indisprimary", where); - table.primary_key_columns = readNamesAndTypesList(tx, postgres_table, query, use_nulls, true); + table.primary_key_columns = readNamesAndTypesList(tx, postgres_table_with_schema, query, use_nulls, true); } if (with_replica_identity_index && !table.primary_key_columns) @@ -299,11 +300,12 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure( "and a.attnum = ANY(ix.indkey) " "and t.relkind in ('r', 'p') " /// simple tables "and t.relname = {} " /// Connection is already done to a needed database, only table name is needed. + "and t.relnamespace = {} " "and ix.indisreplident = 't' " /// index is is replica identity index "ORDER BY a.attname", /// column names - quoteString(postgres_table)); + quoteString(postgres_table), quoteString(postgres_schema.empty() ? "public" : postgres_schema)); - table.replica_identity_columns = readNamesAndTypesList(tx, postgres_table, query, use_nulls, true); + table.replica_identity_columns = readNamesAndTypesList(tx, postgres_table_with_schema, query, use_nulls, true); } return table; diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 8366ca5dc25..1fc0475419c 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -433,6 +433,7 @@ def test_datetime_with_timezone(started_cluster): def test_postgres_ndim(started_cluster): cursor = started_cluster.postgres_conn.cursor() + cursor.execute("DROP TABLE IF EXISTS arr1, arr2") cursor.execute("CREATE TABLE arr1 (a Integer[])") @@ -452,6 +453,20 @@ def test_postgres_ndim(started_cluster): assert result.strip() == "Array(Array(Nullable(Int32)))" cursor.execute("DROP TABLE arr1, arr2") + cursor.execute("DROP SCHEMA IF EXISTS ndim_schema CASCADE") + cursor.execute("CREATE SCHEMA ndim_schema") + cursor.execute("CREATE TABLE ndim_schema.arr1 (a integer[])") + cursor.execute("INSERT INTO ndim_schema.arr1 SELECT '{{1}, {2}}'") + # The point is in creating a table via 'as select *', in postgres att_ndim will not be correct in this case. + cursor.execute("CREATE TABLE ndim_schema.arr2 AS SELECT * FROM ndim_schema.arr1") + result = node1.query( + """SELECT toTypeName(a) FROM postgresql(postgres1, schema='ndim_schema', table='arr2')""" + ) + assert result.strip() == "Array(Array(Nullable(Int32)))" + + cursor.execute("DROP TABLE ndim_schema.arr1, ndim_schema.arr2") + cursor.execute("DROP SCHEMA ndim_schema CASCADE") + def test_postgres_on_conflict(started_cluster): cursor = started_cluster.postgres_conn.cursor() From 0b1ce49b6fff2847b38e39e5886d82e63b37907a Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 24 Jun 2022 03:02:13 +0200 Subject: [PATCH 048/101] Update 02344_describe_cache.sql --- tests/queries/0_stateless/02344_describe_cache.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02344_describe_cache.sql b/tests/queries/0_stateless/02344_describe_cache.sql index cb5a6bcc00d..494470053bd 100644 --- a/tests/queries/0_stateless/02344_describe_cache.sql +++ b/tests/queries/0_stateless/02344_describe_cache.sql @@ -1,3 +1,3 @@ --- tags: no-fasttest +-- Tags: no-fasttest DESCRIBE CACHE 's3_cache'; From aea3091d46af0b315bba958b89eef8ad7ba0a98f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Jun 2022 03:18:51 +0200 Subject: [PATCH 049/101] Simplify hardware benchmark --- benchmark/hardware.sh | 115 ++++++++---------------------------------- 1 file changed, 22 insertions(+), 93 deletions(-) diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index f6206d0257c..323f427a4c9 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -1,102 +1,21 @@ #!/bin/bash -e -if [[ -n $1 ]]; then - SCALE=$1 -else - SCALE=100 -fi - -TABLE="hits_${SCALE}m_obfuscated" -DATASET="${TABLE}_v1.tar.xz" +TABLE="hits_100m_obfuscated" QUERIES_FILE="queries.sql" TRIES=3 -# Note: on older Ubuntu versions, 'axel' does not support IPv6. If you are using IPv6-only servers on very old Ubuntu, just don't install 'axel'. +mkdir -p clickhouse-benchmark +pushd clickhouse-benchmark -FASTER_DOWNLOAD=wget -if command -v axel >/dev/null; then - FASTER_DOWNLOAD=axel -else - echo "It's recommended to install 'axel' for faster downloads." +# Download the binary +if [[ ! -x clickhouse ]]; then + curl https://clickhouse.com/ | sh fi -if command -v pixz >/dev/null; then - TAR_PARAMS='-Ipixz' -else - echo "It's recommended to install 'pixz' for faster decompression of the dataset." -fi - -mkdir -p clickhouse-benchmark-$SCALE -pushd clickhouse-benchmark-$SCALE - -OS=$(uname -s) -ARCH=$(uname -m) - -DIR= - -if [ "${OS}" = "Linux" ] -then - if [ "${ARCH}" = "x86_64" ] - then - DIR="amd64" - elif [ "${ARCH}" = "aarch64" ] - then - DIR="aarch64" - elif [ "${ARCH}" = "powerpc64le" ] - then - DIR="powerpc64le" - fi -elif [ "${OS}" = "FreeBSD" ] -then - if [ "${ARCH}" = "x86_64" ] - then - DIR="freebsd" - elif [ "${ARCH}" = "aarch64" ] - then - DIR="freebsd-aarch64" - elif [ "${ARCH}" = "powerpc64le" ] - then - DIR="freebsd-powerpc64le" - fi -elif [ "${OS}" = "Darwin" ] -then - if [ "${ARCH}" = "x86_64" ] - then - DIR="macos" - elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ] - then - DIR="macos-aarch64" - fi -fi - -if [ -z "${DIR}" ] -then - echo "The '${OS}' operating system with the '${ARCH}' architecture is not supported." - exit 1 -fi - -URL="https://builds.clickhouse.com/master/${DIR}/clickhouse" -echo -echo "Will download ${URL}" -echo -curl -O "${URL}" && chmod a+x clickhouse || exit 1 -echo -echo "Successfully downloaded the ClickHouse binary" - -chmod a+x clickhouse - if [[ ! -f $QUERIES_FILE ]]; then wget "https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/$QUERIES_FILE" fi -if [[ ! -d data ]]; then - if [[ ! -f $DATASET ]]; then - $FASTER_DOWNLOAD "https://datasets.clickhouse.com/hits/partitions/$DATASET" - fi - - tar $TAR_PARAMS --strip-components=1 --directory=. -x -v -f $DATASET -fi - uptime echo "Starting clickhouse-server" @@ -114,10 +33,20 @@ echo "Waiting for clickhouse-server to start" for i in {1..30}; do sleep 1 - ./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM $TABLE" 2>/dev/null && break || echo '.' + ./clickhouse client --query "SELECT 'Ok.'" 2>/dev/null && break || echo -n '.' if [[ $i == 30 ]]; then exit 1; fi done +echo "Will download the dataset" +./clickhouse client --progress --query " + CREATE TABLE ${TABLE} ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) + AS SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/hits/native/hits_100m_obfuscated_*.native.zst')" + +./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM ${TABLE}" + +echo "Will prepare the dataset" +./clickhouse client --query "OPTIMIZE TABLE ${TABLE} FINAL" + echo echo "Will perform benchmark. Results:" echo @@ -133,7 +62,7 @@ cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do echo -n "[" for i in $(seq 1 $TRIES); do - RES=$(./clickhouse client --max_memory_usage 100G --time --format=Null --query="$query" 2>&1 ||:) + RES=$(./clickhouse client --time --format=Null --query="$query" 2>&1 ||:) [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" [[ "$i" != $TRIES ]] && echo -n ", " done @@ -180,10 +109,10 @@ else cat /proc/meminfo | grep MemTotal echo '----RAID Info-------------------' cat /proc/mdstat - #echo '----PCI-------------------------' - #lspci - #echo '----All Hardware Info-----------' - #lshw echo '--------------------------------' fi echo + +echo "Instance type from IMDS (if available):" +curl --connect-timeout 1 http://169.254.169.254/latest/meta-data/instance-type +echo From 6df0aa8810dee91eabf58b3deb837ccb078f432a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Jun 2022 03:19:49 +0200 Subject: [PATCH 050/101] Simplify hardware benchmark --- benchmark/hardware.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index 323f427a4c9..f7e9e4e6831 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -40,7 +40,7 @@ done echo "Will download the dataset" ./clickhouse client --progress --query " CREATE TABLE ${TABLE} ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) - AS SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/hits/native/hits_100m_obfuscated_*.native.zst')" + AS SELECT * FROM s3('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_*.native.zst')" ./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM ${TABLE}" From 5cd655a6ea51b5ec3860dcd0b8b0b4b8921862c4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Jun 2022 03:22:45 +0200 Subject: [PATCH 051/101] Simplify hardware benchmark --- benchmark/hardware.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index f7e9e4e6831..05753f36233 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -38,7 +38,7 @@ for i in {1..30}; do done echo "Will download the dataset" -./clickhouse client --progress --query " +./clickhouse client --max_insert_threads $(nproc || 4) --progress --query " CREATE TABLE ${TABLE} ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) AS SELECT * FROM s3('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_*.native.zst')" From 6cb1d60883b6baa5e7ef72813b25c7eec359b697 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 24 Jun 2022 03:24:54 +0200 Subject: [PATCH 052/101] Fix --- src/Databases/DatabaseFactory.cpp | 8 +++++++- .../test_postgresql_database_engine/test.py | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 5cc334eaad4..82a7dff7125 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -335,7 +335,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String configuration.password = safeGetLiteralValue(engine_args[3], engine_name); if (engine_args.size() >= 5) - configuration.schema = safeGetLiteralValue(engine_args[4], engine_name); + { + auto arg_value = engine_args[4]->as()->value; + if (arg_value.getType() == Field::Types::Which::String) + configuration.schema = safeGetLiteralValue(engine_args[4], engine_name); + else + use_table_cache = safeGetLiteralValue(engine_args[4], engine_name); + } } if (engine_args.size() >= 6) diff --git a/tests/integration/test_postgresql_database_engine/test.py b/tests/integration/test_postgresql_database_engine/test.py index dd5b3a09ca5..aabf3507d8f 100644 --- a/tests/integration/test_postgresql_database_engine/test.py +++ b/tests/integration/test_postgresql_database_engine/test.py @@ -320,6 +320,20 @@ def test_predefined_connection_configuration(started_cluster): cursor.execute("DROP SCHEMA IF EXISTS test_schema CASCADE") +def test_postgres_database_old_syntax(started_cluster): + conn = get_postgres_conn(started_cluster, True) + cursor = conn.cursor() + + node1.query( + """ + DROP DATABASE IF EXISTS test_database; + CREATE DATABASE test_database ENGINE = PostgreSQL('postgres1:5432', 'test_database', 'postgres', 'mysecretpassword', 1); + """ + ) + create_postgres_table(cursor, "test_table") + assert "test_table" in node1.query("SHOW TABLES FROM test_database") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") From e902a824f598e73d83d92dc9032bafa5cc7b7854 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Jun 2022 03:25:28 +0200 Subject: [PATCH 053/101] Simplify hardware benchmark --- benchmark/hardware.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index 05753f36233..f1a88dd3cf5 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -40,7 +40,7 @@ done echo "Will download the dataset" ./clickhouse client --max_insert_threads $(nproc || 4) --progress --query " CREATE TABLE ${TABLE} ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) - AS SELECT * FROM s3('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_*.native.zst')" + AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_*.native.zst')" ./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM ${TABLE}" From e21f2a0efd51b66342286eb73064bb183d847bc0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Jun 2022 03:26:44 +0200 Subject: [PATCH 054/101] Simplify hardware benchmark --- benchmark/hardware.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index f1a88dd3cf5..e146394078e 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -40,7 +40,7 @@ done echo "Will download the dataset" ./clickhouse client --max_insert_threads $(nproc || 4) --progress --query " CREATE TABLE ${TABLE} ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) - AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_*.native.zst')" + AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')" ./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM ${TABLE}" From 7e4038ae526ed2c3f9be3b6e51cc65153efa69fd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Jun 2022 03:27:39 +0200 Subject: [PATCH 055/101] Simplify hardware benchmark --- benchmark/hardware.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index e146394078e..a4cafd501e2 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -39,7 +39,7 @@ done echo "Will download the dataset" ./clickhouse client --max_insert_threads $(nproc || 4) --progress --query " - CREATE TABLE ${TABLE} ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) + CREATE OR REPLACE TABLE ${TABLE} ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')" ./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM ${TABLE}" From 2c828338f409d88f860fe4236045a3cab0956aad Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 17 Jun 2022 12:15:19 +0200 Subject: [PATCH 056/101] Replace hyperscan by vectorscan This commit migrates ClickHouse to Vectorscan. The first 10 min of [0] explain the reasons for it. (*) Addresses (but does not resolve) #38046 (*) Config parameter names (e.g. "max_hyperscan_regexp_length") are preserved for compatibility. Likewise, error codes (e.g. "ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT") and function/class names (e.g. "HyperscanDeleter") are preserved as vectorscan aims to be a drop-in replacement. [0] https://www.youtube.com/watch?v=KlZWmmflW6M --- .gitmodules | 6 +- contrib/CMakeLists.txt | 2 +- contrib/hyperscan | 1 - contrib/vectorscan | 1 + .../CMakeLists.txt | 180 +- contrib/vectorscan-cmake/aarch64/config.h | 142 + .../common/hs_version.h | 5 +- .../vectorscan-cmake/rageled_files/Parser.cpp | 5605 +++++++++++++++++ .../rageled_files/control_verbs.cpp | 443 ++ .../x86_64/config.h | 38 +- docs/en/development/contrib.md | 2 +- src/Functions/CMakeLists.txt | 4 +- src/Functions/MultiMatchAllIndicesImpl.h | 10 +- src/Functions/MultiMatchAnyImpl.h | 16 +- src/Functions/Regexps.h | 6 +- src/Functions/URL/CMakeLists.txt | 4 +- src/Functions/config_functions.h.in | 2 +- src/Functions/configure_config.cmake | 4 +- ...StorageSystemBuildOptions.generated.cpp.in | 2 +- .../queries/0_stateless/00926_multimatch.sql | 2 +- .../00929_multi_match_edit_distance.sql | 2 +- .../02004_max_hyperscan_regex_length.sql | 2 +- .../1_stateful/00095_hyperscan_profiler.sql | 2 +- .../aspell-ignore/en/aspell-dict.txt | 1 + 24 files changed, 6383 insertions(+), 99 deletions(-) delete mode 160000 contrib/hyperscan create mode 160000 contrib/vectorscan rename contrib/{hyperscan-cmake => vectorscan-cmake}/CMakeLists.txt (73%) create mode 100644 contrib/vectorscan-cmake/aarch64/config.h rename contrib/{hyperscan-cmake => vectorscan-cmake}/common/hs_version.h (94%) create mode 100644 contrib/vectorscan-cmake/rageled_files/Parser.cpp create mode 100644 contrib/vectorscan-cmake/rageled_files/control_verbs.cpp rename contrib/{hyperscan-cmake => vectorscan-cmake}/x86_64/config.h (73%) diff --git a/.gitmodules b/.gitmodules index b102267c7aa..16099c63368 100644 --- a/.gitmodules +++ b/.gitmodules @@ -86,9 +86,6 @@ [submodule "contrib/h3"] path = contrib/h3 url = https://github.com/ClickHouse/h3 -[submodule "contrib/hyperscan"] - path = contrib/hyperscan - url = https://github.com/ClickHouse/hyperscan.git [submodule "contrib/libunwind"] path = contrib/libunwind url = https://github.com/ClickHouse/libunwind.git @@ -268,6 +265,9 @@ [submodule "contrib/hashidsxx"] path = contrib/hashidsxx url = https://github.com/schoentoon/hashidsxx.git +[submodule "contrib/vectorscan"] + path = contrib/vectorscan + url = https://github.com/VectorCamp/vectorscan.git [submodule "contrib/liburing"] path = contrib/liburing url = https://github.com/axboe/liburing.git diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index c2ffd0131da..1bb53669cfa 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -58,7 +58,7 @@ add_contrib (boost-cmake boost) add_contrib (cctz-cmake cctz) add_contrib (consistent-hashing) add_contrib (dragonbox-cmake dragonbox) -add_contrib (hyperscan-cmake hyperscan) +add_contrib (vectorscan-cmake vectorscan) add_contrib (jemalloc-cmake jemalloc) add_contrib (libcpuid-cmake libcpuid) add_contrib (libdivide) diff --git a/contrib/hyperscan b/contrib/hyperscan deleted file mode 160000 index 5edc68c5ac6..00000000000 --- a/contrib/hyperscan +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 5edc68c5ac68d2d4f876159e9ee84def6d3dc87c diff --git a/contrib/vectorscan b/contrib/vectorscan new file mode 160000 index 00000000000..73695e419c2 --- /dev/null +++ b/contrib/vectorscan @@ -0,0 +1 @@ +Subproject commit 73695e419c27af7fe2a099c7aa57931cc02aea5d diff --git a/contrib/hyperscan-cmake/CMakeLists.txt b/contrib/vectorscan-cmake/CMakeLists.txt similarity index 73% rename from contrib/hyperscan-cmake/CMakeLists.txt rename to contrib/vectorscan-cmake/CMakeLists.txt index 02c823a3a42..140c174cd73 100644 --- a/contrib/hyperscan-cmake/CMakeLists.txt +++ b/contrib/vectorscan-cmake/CMakeLists.txt @@ -1,54 +1,65 @@ -if (HAVE_SSSE3) - option (ENABLE_HYPERSCAN "Enable hyperscan library" ${ENABLE_LIBRARIES}) -elseif(ENABLE_HYPERSCAN) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use hyperscan without SSSE3") - set (ENABLE_HYPERSCAN OFF) -endif () +# We use vectorscan, a portable and API/ABI-compatible drop-in replacement for hyperscan. -if (NOT ENABLE_HYPERSCAN) - message (STATUS "Not using hyperscan") +if (ARCH_AMD64 OR ARCH_AARCH64) + option (ENABLE_VECTORSCAN "Enable vectorscan library" ${ENABLE_LIBRARIES}) +endif() + +# TODO PPC should generally work but needs manual generation of ppc/config.h file on a PPC machine + +if (NOT ENABLE_VECTORSCAN) + message (STATUS "Not using vectorscan") return() endif() -set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hyperscan") +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/vectorscan") +# Gobble up all c/cpp files in vectorscan/src/, omit *dump*.c/cpp files as we don't use the dump feature (see x86/config.h) set (SRCS "${LIBRARY_DIR}/src/alloc.c" + "${LIBRARY_DIR}/src/crc32.c" + "${LIBRARY_DIR}/src/database.c" + # "${LIBRARY_DIR}/src/dispatcher.c" # the linker's wrath be upon those who include dispatcher.c. + "${LIBRARY_DIR}/src/grey.cpp" + "${LIBRARY_DIR}/src/hs.cpp" + "${LIBRARY_DIR}/src/hs_valid_platform.c" + "${LIBRARY_DIR}/src/hs_version.c" + "${LIBRARY_DIR}/src/runtime.c" + "${LIBRARY_DIR}/src/scratch.c" + "${LIBRARY_DIR}/src/stream_compress.c" + "${LIBRARY_DIR}/src/compiler/asserts.cpp" "${LIBRARY_DIR}/src/compiler/compiler.cpp" "${LIBRARY_DIR}/src/compiler/error.cpp" - "${LIBRARY_DIR}/src/crc32.c" - "${LIBRARY_DIR}/src/database.c" + "${LIBRARY_DIR}/src/fdr/engine_description.cpp" - "${LIBRARY_DIR}/src/fdr/fdr_compile_util.cpp" + "${LIBRARY_DIR}/src/fdr/fdr.c" "${LIBRARY_DIR}/src/fdr/fdr_compile.cpp" + "${LIBRARY_DIR}/src/fdr/fdr_compile_util.cpp" "${LIBRARY_DIR}/src/fdr/fdr_confirm_compile.cpp" "${LIBRARY_DIR}/src/fdr/fdr_engine_description.cpp" - "${LIBRARY_DIR}/src/fdr/fdr.c" "${LIBRARY_DIR}/src/fdr/flood_compile.cpp" + "${LIBRARY_DIR}/src/fdr/teddy.c" + "${LIBRARY_DIR}/src/fdr/teddy_avx2.c" "${LIBRARY_DIR}/src/fdr/teddy_compile.cpp" "${LIBRARY_DIR}/src/fdr/teddy_engine_description.cpp" - "${LIBRARY_DIR}/src/fdr/teddy.c" - "${LIBRARY_DIR}/src/grey.cpp" - "${LIBRARY_DIR}/src/hs_valid_platform.c" - "${LIBRARY_DIR}/src/hs_version.c" - "${LIBRARY_DIR}/src/hs.cpp" + + "${LIBRARY_DIR}/src/hwlm/hwlm.c" "${LIBRARY_DIR}/src/hwlm/hwlm_build.cpp" "${LIBRARY_DIR}/src/hwlm/hwlm_literal.cpp" - "${LIBRARY_DIR}/src/hwlm/hwlm.c" "${LIBRARY_DIR}/src/hwlm/noodle_build.cpp" - "${LIBRARY_DIR}/src/hwlm/noodle_engine.c" - "${LIBRARY_DIR}/src/nfa/accel_dfa_build_strat.cpp" + "${LIBRARY_DIR}/src/hwlm/noodle_engine.cpp" + "${LIBRARY_DIR}/src/nfa/accel.c" + "${LIBRARY_DIR}/src/nfa/accel_dfa_build_strat.cpp" "${LIBRARY_DIR}/src/nfa/accelcompile.cpp" "${LIBRARY_DIR}/src/nfa/castle.c" "${LIBRARY_DIR}/src/nfa/castlecompile.cpp" "${LIBRARY_DIR}/src/nfa/dfa_build_strat.cpp" "${LIBRARY_DIR}/src/nfa/dfa_min.cpp" "${LIBRARY_DIR}/src/nfa/gough.c" + "${LIBRARY_DIR}/src/nfa/goughcompile.cpp" "${LIBRARY_DIR}/src/nfa/goughcompile_accel.cpp" "${LIBRARY_DIR}/src/nfa/goughcompile_reg.cpp" - "${LIBRARY_DIR}/src/nfa/goughcompile.cpp" "${LIBRARY_DIR}/src/nfa/lbr.c" "${LIBRARY_DIR}/src/nfa/limex_64.c" "${LIBRARY_DIR}/src/nfa/limex_accel.c" @@ -59,28 +70,32 @@ set (SRCS "${LIBRARY_DIR}/src/nfa/limex_simd384.c" "${LIBRARY_DIR}/src/nfa/limex_simd512.c" "${LIBRARY_DIR}/src/nfa/mcclellan.c" - "${LIBRARY_DIR}/src/nfa/mcclellancompile_util.cpp" "${LIBRARY_DIR}/src/nfa/mcclellancompile.cpp" + "${LIBRARY_DIR}/src/nfa/mcclellancompile_util.cpp" + "${LIBRARY_DIR}/src/nfa/mcsheng.c" "${LIBRARY_DIR}/src/nfa/mcsheng_compile.cpp" "${LIBRARY_DIR}/src/nfa/mcsheng_data.c" - "${LIBRARY_DIR}/src/nfa/mcsheng.c" "${LIBRARY_DIR}/src/nfa/mpv.c" "${LIBRARY_DIR}/src/nfa/mpvcompile.cpp" "${LIBRARY_DIR}/src/nfa/nfa_api_dispatch.c" "${LIBRARY_DIR}/src/nfa/nfa_build_util.cpp" + "${LIBRARY_DIR}/src/nfa/rdfa.cpp" "${LIBRARY_DIR}/src/nfa/rdfa_graph.cpp" "${LIBRARY_DIR}/src/nfa/rdfa_merge.cpp" - "${LIBRARY_DIR}/src/nfa/rdfa.cpp" "${LIBRARY_DIR}/src/nfa/repeat.c" "${LIBRARY_DIR}/src/nfa/repeatcompile.cpp" "${LIBRARY_DIR}/src/nfa/sheng.c" "${LIBRARY_DIR}/src/nfa/shengcompile.cpp" - "${LIBRARY_DIR}/src/nfa/shufti.c" + "${LIBRARY_DIR}/src/nfa/shufti.cpp" "${LIBRARY_DIR}/src/nfa/shufticompile.cpp" "${LIBRARY_DIR}/src/nfa/tamarama.c" "${LIBRARY_DIR}/src/nfa/tamaramacompile.cpp" - "${LIBRARY_DIR}/src/nfa/truffle.c" + "${LIBRARY_DIR}/src/nfa/truffle.cpp" "${LIBRARY_DIR}/src/nfa/trufflecompile.cpp" + "${LIBRARY_DIR}/src/nfa/vermicelli_simd.cpp" + "${LIBRARY_DIR}/src/nfa/vermicellicompile.cpp" + + "${LIBRARY_DIR}/src/nfagraph/ng.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_anchored_acyclic.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_anchored_dots.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_asserts.cpp" @@ -100,8 +115,8 @@ set (SRCS "${LIBRARY_DIR}/src/nfagraph/ng_holder.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_is_equal.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_lbr.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_limex_accel.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_limex.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_limex_accel.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_literal_analysis.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_literal_component.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_literal_decorated.cpp" @@ -112,17 +127,17 @@ set (SRCS "${LIBRARY_DIR}/src/nfagraph/ng_prune.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_puff.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_redundancy.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_region_redundancy.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_region.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_region_redundancy.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_repeat.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_reports.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_restructuring.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_revacc.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_sep.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_small_literal_set.cpp" + "${LIBRARY_DIR}/src/nfagraph/ng_som.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_som_add_redundancy.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_som_util.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng_som.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_split.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_squash.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_stop.cpp" @@ -132,10 +147,8 @@ set (SRCS "${LIBRARY_DIR}/src/nfagraph/ng_vacuous.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_violet.cpp" "${LIBRARY_DIR}/src/nfagraph/ng_width.cpp" - "${LIBRARY_DIR}/src/nfagraph/ng.cpp" + "${LIBRARY_DIR}/src/parser/AsciiComponentClass.cpp" - "${LIBRARY_DIR}/src/parser/buildstate.cpp" - "${LIBRARY_DIR}/src/parser/check_refs.cpp" "${LIBRARY_DIR}/src/parser/Component.cpp" "${LIBRARY_DIR}/src/parser/ComponentAlternation.cpp" "${LIBRARY_DIR}/src/parser/ComponentAssertion.cpp" @@ -145,31 +158,34 @@ set (SRCS "${LIBRARY_DIR}/src/parser/ComponentByte.cpp" "${LIBRARY_DIR}/src/parser/ComponentClass.cpp" "${LIBRARY_DIR}/src/parser/ComponentCondReference.cpp" - "${LIBRARY_DIR}/src/parser/ComponentEmpty.cpp" "${LIBRARY_DIR}/src/parser/ComponentEUS.cpp" + "${LIBRARY_DIR}/src/parser/ComponentEmpty.cpp" "${LIBRARY_DIR}/src/parser/ComponentRepeat.cpp" "${LIBRARY_DIR}/src/parser/ComponentSequence.cpp" "${LIBRARY_DIR}/src/parser/ComponentVisitor.cpp" "${LIBRARY_DIR}/src/parser/ComponentWordBoundary.cpp" "${LIBRARY_DIR}/src/parser/ConstComponentVisitor.cpp" - "${LIBRARY_DIR}/src/parser/control_verbs.cpp" + "${LIBRARY_DIR}/src/parser/Utf8ComponentClass.cpp" + "${LIBRARY_DIR}/src/parser/buildstate.cpp" + "${LIBRARY_DIR}/src/parser/buildstate.cpp" + "${LIBRARY_DIR}/src/parser/check_refs.cpp" + "${LIBRARY_DIR}/src/parser/check_refs.cpp" "${LIBRARY_DIR}/src/parser/logical_combination.cpp" "${LIBRARY_DIR}/src/parser/parse_error.cpp" "${LIBRARY_DIR}/src/parser/parser_util.cpp" - "${LIBRARY_DIR}/src/parser/Parser.cpp" "${LIBRARY_DIR}/src/parser/prefilter.cpp" "${LIBRARY_DIR}/src/parser/shortcut_literal.cpp" "${LIBRARY_DIR}/src/parser/ucp_table.cpp" "${LIBRARY_DIR}/src/parser/unsupported.cpp" "${LIBRARY_DIR}/src/parser/utf8_validate.cpp" - "${LIBRARY_DIR}/src/parser/Utf8ComponentClass.cpp" + "${LIBRARY_DIR}/src/rose/block.c" "${LIBRARY_DIR}/src/rose/catchup.c" "${LIBRARY_DIR}/src/rose/init.c" "${LIBRARY_DIR}/src/rose/match.c" "${LIBRARY_DIR}/src/rose/program_runtime.c" - "${LIBRARY_DIR}/src/rose/rose_build_add_mask.cpp" "${LIBRARY_DIR}/src/rose/rose_build_add.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_add_mask.cpp" "${LIBRARY_DIR}/src/rose/rose_build_anchored.cpp" "${LIBRARY_DIR}/src/rose/rose_build_bytecode.cpp" "${LIBRARY_DIR}/src/rose/rose_build_castle.cpp" @@ -187,53 +203,95 @@ set (SRCS "${LIBRARY_DIR}/src/rose/rose_build_matchers.cpp" "${LIBRARY_DIR}/src/rose/rose_build_merge.cpp" "${LIBRARY_DIR}/src/rose/rose_build_misc.cpp" + "${LIBRARY_DIR}/src/rose/rose_build_misc.cpp" "${LIBRARY_DIR}/src/rose/rose_build_program.cpp" "${LIBRARY_DIR}/src/rose/rose_build_role_aliasing.cpp" "${LIBRARY_DIR}/src/rose/rose_build_scatter.cpp" "${LIBRARY_DIR}/src/rose/rose_build_width.cpp" "${LIBRARY_DIR}/src/rose/rose_in_util.cpp" "${LIBRARY_DIR}/src/rose/stream.c" - "${LIBRARY_DIR}/src/runtime.c" - "${LIBRARY_DIR}/src/scratch.c" + "${LIBRARY_DIR}/src/smallwrite/smallwrite_build.cpp" + "${LIBRARY_DIR}/src/som/slot_manager.cpp" "${LIBRARY_DIR}/src/som/som_runtime.c" "${LIBRARY_DIR}/src/som/som_stream.c" - "${LIBRARY_DIR}/src/stream_compress.c" + "${LIBRARY_DIR}/src/util/alloc.cpp" "${LIBRARY_DIR}/src/util/charreach.cpp" "${LIBRARY_DIR}/src/util/clique.cpp" "${LIBRARY_DIR}/src/util/compile_context.cpp" "${LIBRARY_DIR}/src/util/compile_error.cpp" - "${LIBRARY_DIR}/src/util/cpuid_flags.c" "${LIBRARY_DIR}/src/util/depth.cpp" "${LIBRARY_DIR}/src/util/fatbit_build.cpp" - "${LIBRARY_DIR}/src/util/multibit_build.cpp" "${LIBRARY_DIR}/src/util/multibit.c" + "${LIBRARY_DIR}/src/util/multibit_build.cpp" "${LIBRARY_DIR}/src/util/report_manager.cpp" - "${LIBRARY_DIR}/src/util/simd_utils.c" "${LIBRARY_DIR}/src/util/state_compress.c" "${LIBRARY_DIR}/src/util/target_info.cpp" "${LIBRARY_DIR}/src/util/ue2string.cpp" ) -add_library (_hyperscan ${SRCS}) +# The original build system invokes ragel on src/parser/{Parser|control_verbs}.rl (+ a few more .rl files which are unneeded). To avoid a +# build-time dependency on ragel (via contrib/ or find_program()), add the manually generated output of ragel to the sources. +# Please regenerate these files if you update vectorscan. +list (APPEND SRCS + "${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/Parser.cpp" + "${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/control_verbs.cpp" +) -target_compile_options (_hyperscan - PRIVATE -g0 # Library has too much debug information - -mno-avx -mno-avx2 # The library is using dynamic dispatch and is confused if AVX is enabled globally - -march=corei7 -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # The options from original build system - -fno-sanitize=undefined # Assume the library takes care of itself -) -target_include_directories (_hyperscan - PRIVATE - common - "${LIBRARY_DIR}/include" -) -target_include_directories (_hyperscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") +# Platform-dependent files if (ARCH_AMD64) - target_include_directories (_hyperscan PRIVATE x86_64) -endif () -target_link_libraries (_hyperscan PRIVATE boost::headers_only) + list(APPEND SRCS + "${LIBRARY_DIR}/src/util/arch/x86/cpuid_flags.c" + "${LIBRARY_DIR}/src/util/arch/x86/masked_move.c" + "${LIBRARY_DIR}/src/util/supervector/arch/x86/impl.cpp" + ) +endif() -add_library (ch_contrib::hyperscan ALIAS _hyperscan) +if (ARCH_AARCH64) + list(APPEND SRCS + "${LIBRARY_DIR}/src/util/arch/arm/cpuid_flags.c" + "${LIBRARY_DIR}/src/util/supervector/arch/arm/impl.cpp" + ) +endif() + +# TODO +# if (ARCH_PPC64LE) +# list(APPEND SRCS +# "${LIBRARY_DIR}/src/util/supervector/arch/ppc64el/impl.cpp" +# ) +# endif() + +add_library (_vectorscan ${SRCS}) + +target_compile_options (_vectorscan PRIVATE + -g0 # library has too much debug information + -fno-sanitize=undefined # assume the library takes care of itself + -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # options from original build system +) + +# Include version header manually generated by running the original build system +target_include_directories (_vectorscan SYSTEM PRIVATE common) + +# vectorscan inherited some patched in-source versions of boost headers to fix a bug in +# boost 1.69. This bug has been solved long ago but vectorscan's source code still +# points to the patched versions, so include it here. +target_include_directories (_vectorscan SYSTEM PRIVATE "${LIBRARY_DIR}/include") + +target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") + +# Include platform-specific config header generated by manually running the original build system +# Please regenerate these files if you update vectorscan. + +if (ARCH_AMD64) + target_include_directories (_vectorscan PRIVATE x86_64) +endif () + +if (ARCH_AARCH64) + target_include_directories (_vectorscan PRIVATE aarch64) +endif () + +target_link_libraries (_vectorscan PRIVATE boost::headers_only) + +add_library (ch_contrib::vectorscan ALIAS _vectorscan) diff --git a/contrib/vectorscan-cmake/aarch64/config.h b/contrib/vectorscan-cmake/aarch64/config.h new file mode 100644 index 00000000000..78da1c8ad00 --- /dev/null +++ b/contrib/vectorscan-cmake/aarch64/config.h @@ -0,0 +1,142 @@ +/* used by cmake */ + +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* "Define if the build is 32 bit" */ +/* #undef ARCH_32_BIT */ + +/* "Define if the build is 64 bit" */ +#define ARCH_64_BIT + +/* "Define if building for IA32" */ +/* #undef ARCH_IA32 */ + +/* "Define if building for EM64T" */ +/* #undef ARCH_X86_64 */ + +/* "Define if building for ARM32" */ +/* #undef ARCH_ARM32 */ + +/* "Define if building for AARCH64" */ +#define ARCH_AARCH64 + +/* "Define if building for PPC64EL" */ +/* #undef ARCH_PPC64EL */ + +/* "Define if cross compiling for AARCH64" */ +/* #undef CROSS_COMPILE_AARCH64 */ + +/* Define if building SVE for AARCH64. */ +/* #undef BUILD_SVE */ + +/* Define if building SVE2 for AARCH64. */ +/* #undef BUILD_SVE2 */ + +/* Define if building SVE2+BITPERM for AARCH64. */ +/* #undef BUILD_SVE2_BITPERM */ + +/* internal build, switch on dump support. */ +/* #undef DUMP_SUPPORT */ + +/* Define if building "fat" runtime. */ +/* #undef FAT_RUNTIME */ + +/* Define if building AVX2 in the fat runtime. */ +/* #undef BUILD_AVX2 */ + +/* Define if building AVX-512 in the fat runtime. */ +/* #undef BUILD_AVX512 */ + +/* Define if building AVX512VBMI in the fat runtime. */ +/* #undef BUILD_AVX512VBMI */ + +/* Define to 1 if `backtrace' works. */ +#define HAVE_BACKTRACE + +/* C compiler has __builtin_assume_aligned */ +#define HAVE_CC_BUILTIN_ASSUME_ALIGNED + +/* C++ compiler has __builtin_assume_aligned */ +#define HAVE_CXX_BUILTIN_ASSUME_ALIGNED + +/* C++ compiler has x86intrin.h */ +/* #undef HAVE_CXX_X86INTRIN_H */ + +/* C compiler has x86intrin.h */ +/* #undef HAVE_C_X86INTRIN_H */ + +/* C++ compiler has intrin.h */ +/* #undef HAVE_CXX_INTRIN_H */ + +/* C compiler has intrin.h */ +/* #undef HAVE_C_INTRIN_H */ + +/* C compiler has arm_neon.h */ +#define HAVE_C_ARM_NEON_H + +/* C compiler has arm_sve.h */ +/* #undef HAVE_C_ARM_SVE_H */ + +/* C compiler has arm_neon.h */ +/* #undef HAVE_C_PPC64EL_ALTIVEC_H */ + +/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to + 0 if you don't. */ +/* #undef HAVE_DECL_PTHREAD_SETAFFINITY_NP */ + +/* #undef HAVE_PTHREAD_NP_H */ + +/* Define to 1 if you have the `malloc_info' function. */ +/* #undef HAVE_MALLOC_INFO */ + +/* Define to 1 if you have the `memmem' function. */ +/* #undef HAVE_MEMMEM */ + +/* Define to 1 if you have a working `mmap' system call. */ +#define HAVE_MMAP + +/* Define to 1 if `posix_memalign' works. */ +#define HAVE_POSIX_MEMALIGN + +/* Define to 1 if you have the `setrlimit' function. */ +#define HAVE_SETRLIMIT + +/* Define to 1 if you have the `shmget' function. */ +/* #undef HAVE_SHMGET */ + +/* Define to 1 if you have the `sigaction' function. */ +#define HAVE_SIGACTION + +/* Define to 1 if you have the `sigaltstack' function. */ +#define HAVE_SIGALTSTACK + +/* Define if the sqlite3_open_v2 call is available */ +/* #undef HAVE_SQLITE3_OPEN_V2 */ + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H + +/* Define to 1 if you have the `_aligned_malloc' function. */ +/* #undef HAVE__ALIGNED_MALLOC */ + +/* Define if compiler has __builtin_constant_p */ +/* #undef HAVE__BUILTIN_CONSTANT_P */ + +/* Optimize, inline critical functions */ +#define HS_OPTIMIZE + +#define HS_VERSION +#define HS_MAJOR_VERSION +#define HS_MINOR_VERSION +#define HS_PATCH_VERSION + +#define BUILD_DATE + +/* define if this is a release build. */ +#define RELEASE_BUILD + +/* define if reverse_graph requires patch for boost 1.62.0 */ +/* #undef BOOST_REVGRAPH_PATCH */ + +#endif /* CONFIG_H_ */ diff --git a/contrib/hyperscan-cmake/common/hs_version.h b/contrib/vectorscan-cmake/common/hs_version.h similarity index 94% rename from contrib/hyperscan-cmake/common/hs_version.h rename to contrib/vectorscan-cmake/common/hs_version.h index f6fa8cb209f..8315b44fb2a 100644 --- a/contrib/hyperscan-cmake/common/hs_version.h +++ b/contrib/vectorscan-cmake/common/hs_version.h @@ -32,9 +32,8 @@ /** * A version string to identify this release of Hyperscan. */ -#define HS_VERSION_STRING "5.1.1 2000-01-01" +#define HS_VERSION_STRING "5.4.7 2022-06-20" -#define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (1 << 8) | 0) +#define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (7 << 8) | 0) #endif /* HS_VERSION_H_C6428FAF8E3713 */ - diff --git a/contrib/vectorscan-cmake/rageled_files/Parser.cpp b/contrib/vectorscan-cmake/rageled_files/Parser.cpp new file mode 100644 index 00000000000..aebbd7ace1e --- /dev/null +++ b/contrib/vectorscan-cmake/rageled_files/Parser.cpp @@ -0,0 +1,5605 @@ + +#line 1 "Parser.rl" +/* + * Copyright (c) 2015-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Parser code (generated with Ragel from Parser.rl). + */ + +#include "config.h" + +/* Parser.cpp is a built source, may not be in same dir as parser files */ +#include "parser/check_refs.h" +#include "parser/control_verbs.h" +#include "parser/ComponentAlternation.h" +#include "parser/ComponentAssertion.h" +#include "parser/ComponentAtomicGroup.h" +#include "parser/ComponentBackReference.h" +#include "parser/ComponentBoundary.h" +#include "parser/ComponentByte.h" +#include "parser/ComponentClass.h" +#include "parser/ComponentCondReference.h" +#include "parser/ComponentEmpty.h" +#include "parser/ComponentEUS.h" +#include "parser/Component.h" +#include "parser/ComponentRepeat.h" +#include "parser/ComponentSequence.h" +#include "parser/ComponentWordBoundary.h" +#include "parser/parse_error.h" +#include "parser/Parser.h" +#include "ue2common.h" +#include "util/compare.h" +#include "util/flat_containers.h" +#include "util/unicode_def.h" +#include "util/verify_types.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +namespace ue2 { + +#define PUSH_SEQUENCE do {\ + sequences.push_back(ExprState(currentSeq, (size_t)(ts - ptr), \ + mode)); \ + } while(0) +#define POP_SEQUENCE do {\ + currentSeq = sequences.back().seq; \ + mode = sequences.back().mode; \ + sequences.pop_back(); \ + } while(0) + +namespace { + +/** \brief Structure representing current state as we're parsing (current + * sequence, current options). Stored in the 'sequences' vector. */ +struct ExprState { + ExprState(ComponentSequence *seq_in, size_t offset, + const ParseMode &mode_in) : + seq(seq_in), seqOffset(offset), mode(mode_in) {} + + ComponentSequence *seq; //!< current sequence + size_t seqOffset; //!< offset seq was entered, for error reporting + ParseMode mode; //!< current mode flags +}; + +} // namespace + +static +unsigned parseAsDecimal(unsigned oct) { + // The input was parsed as octal, but should have been parsed as decimal. + // Deconstruct the octal number and reconstruct into decimal + unsigned ret = 0; + unsigned multiplier = 1; + while (oct) { + ret += (oct & 0x7) * multiplier; + oct >>= 3; + multiplier *= 10; + } + return ret; +} + +/** \brief Maximum value for a positive integer. We use INT_MAX, as that's what + * PCRE uses. */ +static constexpr u32 MAX_NUMBER = INT_MAX; + +static +void pushDec(u32 *acc, char raw_digit) { + assert(raw_digit >= '0' && raw_digit <= '9'); + u32 digit_val = raw_digit - '0'; + + // Ensure that we don't overflow. + u64a val = ((u64a)*acc * 10) + digit_val; + if (val > MAX_NUMBER) { + throw LocatedParseError("Number is too big"); + } + + *acc = verify_u32(val); +} + +static +void pushOct(u32 *acc, char raw_digit) { + assert(raw_digit >= '0' && raw_digit <= '7'); + u32 digit_val = raw_digit - '0'; + + // Ensure that we don't overflow. + u64a val = ((u64a)*acc * 8) + digit_val; + if (val > MAX_NUMBER) { + throw LocatedParseError("Number is too big"); + } + + *acc = verify_u32(val); +} + +static +void throwInvalidRepeat(void) { + throw LocatedParseError("Invalid repeat"); +} + +static +void throwInvalidUtf8(void) { + throw ParseError("Expression is not valid UTF-8."); +} + +/** + * Adds the given child component to the parent sequence, returning a pointer + * to the new (child) "current sequence". + */ +static +ComponentSequence *enterSequence(ComponentSequence *parent, + unique_ptr child) { + assert(parent); + assert(child); + + ComponentSequence *seq = child.get(); + parent->addComponent(move(child)); + return seq; +} + +static +void addLiteral(ComponentSequence *currentSeq, char c, const ParseMode &mode) { + if (mode.utf8 && mode.caseless) { + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + assert(cc); + cc->add(c); + cc->finalize(); + currentSeq->addComponent(move(cc)); + } else { + currentSeq->addComponent(getLiteralComponentClass(c, mode.caseless)); + } +} + +static +void addEscaped(ComponentSequence *currentSeq, unichar accum, + const ParseMode &mode, const char *err_msg) { + if (mode.utf8) { + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + assert(cc); + cc->add(accum); + cc->finalize(); + currentSeq->addComponent(move(cc)); + } else { + if (accum > 255) { + throw LocatedParseError(err_msg); + } + addLiteral(currentSeq, (char)accum, mode); + } +} + +static +void addEscapedOctal(ComponentSequence *currentSeq, unichar accum, + const ParseMode &mode) { + addEscaped(currentSeq, accum, mode, "Octal value is greater than \\377"); +} + +static +void addEscapedHex(ComponentSequence *currentSeq, unichar accum, + const ParseMode &mode) { + addEscaped(currentSeq, accum, mode, + "Hexadecimal value is greater than \\xFF"); +} + +#define SLASH_C_ERROR "\\c must be followed by an ASCII character" + +static +u8 decodeCtrl(char raw) { + if (raw & 0x80) { + throw LocatedParseError(SLASH_C_ERROR); + } + return mytoupper(raw) ^ 0x40; +} + +static +unichar readUtf8CodePoint2c(const char *s) { + auto *ts = (const u8 *)s; + assert(ts[0] >= 0xc0 && ts[0] < 0xe0); + assert(ts[1] >= 0x80 && ts[1] < 0xc0); + unichar val = ts[0] & 0x1f; + val <<= 6; + val |= ts[1] & 0x3f; + DEBUG_PRINTF("utf8 %02hhx %02hhx ->\\x{%x}\n", ts[0], + ts[1], val); + return val; +} + +static +unichar readUtf8CodePoint3c(const char *s) { + auto *ts = (const u8 *)s; + assert(ts[0] >= 0xe0 && ts[0] < 0xf0); + assert(ts[1] >= 0x80 && ts[1] < 0xc0); + assert(ts[2] >= 0x80 && ts[2] < 0xc0); + unichar val = ts[0] & 0x0f; + val <<= 6; + val |= ts[1] & 0x3f; + val <<= 6; + val |= ts[2] & 0x3f; + DEBUG_PRINTF("utf8 %02hhx %02hhx %02hhx ->\\x{%x}\n", ts[0], + ts[1], ts[2], val); + return val; +} + +static +unichar readUtf8CodePoint4c(const char *s) { + auto *ts = (const u8 *)s; + assert(ts[0] >= 0xf0 && ts[0] < 0xf8); + assert(ts[1] >= 0x80 && ts[1] < 0xc0); + assert(ts[2] >= 0x80 && ts[2] < 0xc0); + assert(ts[3] >= 0x80 && ts[3] < 0xc0); + unichar val = ts[0] & 0x07; + val <<= 6; + val |= ts[1] & 0x3f; + val <<= 6; + val |= ts[2] & 0x3f; + val <<= 6; + val |= ts[3] & 0x3f; + DEBUG_PRINTF("utf8 %02hhx %02hhx %02hhx %02hhx ->\\x{%x}\n", ts[0], + ts[1], ts[2], ts[3], val); + return val; +} + + +#line 1909 "Parser.rl" + + + +#line 281 "Parser.cpp" +static const short _regex_actions[] = { + 0, 1, 0, 1, 1, 1, 2, 1, + 3, 1, 4, 1, 7, 1, 8, 1, + 9, 1, 10, 1, 11, 1, 12, 1, + 13, 1, 15, 1, 16, 1, 17, 1, + 18, 1, 19, 1, 20, 1, 21, 1, + 22, 1, 23, 1, 24, 1, 25, 1, + 26, 1, 27, 1, 28, 1, 29, 1, + 30, 1, 31, 1, 32, 1, 33, 1, + 34, 1, 35, 1, 36, 1, 37, 1, + 38, 1, 39, 1, 40, 1, 41, 1, + 42, 1, 43, 1, 44, 1, 45, 1, + 46, 1, 47, 1, 48, 1, 49, 1, + 50, 1, 51, 1, 52, 1, 53, 1, + 54, 1, 55, 1, 56, 1, 57, 1, + 58, 1, 59, 1, 60, 1, 61, 1, + 62, 1, 63, 1, 64, 1, 65, 1, + 66, 1, 67, 1, 68, 1, 69, 1, + 70, 1, 71, 1, 72, 1, 73, 1, + 74, 1, 75, 1, 76, 1, 77, 1, + 78, 1, 79, 1, 80, 1, 81, 1, + 82, 1, 83, 1, 84, 1, 85, 1, + 86, 1, 87, 1, 88, 1, 89, 1, + 90, 1, 91, 1, 92, 1, 93, 1, + 94, 1, 95, 1, 96, 1, 97, 1, + 98, 1, 99, 1, 100, 1, 101, 1, + 102, 1, 103, 1, 104, 1, 105, 1, + 106, 1, 107, 1, 108, 1, 109, 1, + 110, 1, 111, 1, 112, 1, 113, 1, + 114, 1, 115, 1, 116, 1, 117, 1, + 118, 1, 119, 1, 120, 1, 121, 1, + 122, 1, 123, 1, 124, 1, 125, 1, + 126, 1, 127, 1, 128, 1, 129, 1, + 130, 1, 131, 1, 132, 1, 133, 1, + 134, 1, 135, 1, 136, 1, 137, 1, + 138, 1, 139, 1, 140, 1, 141, 1, + 142, 1, 143, 1, 144, 1, 145, 1, + 146, 1, 147, 1, 148, 1, 149, 1, + 150, 1, 151, 1, 152, 1, 153, 1, + 154, 1, 155, 1, 156, 1, 157, 1, + 158, 1, 159, 1, 160, 1, 161, 1, + 162, 1, 163, 1, 164, 1, 165, 1, + 166, 1, 167, 1, 168, 1, 169, 1, + 170, 1, 171, 1, 172, 1, 173, 1, + 174, 1, 175, 1, 176, 1, 177, 1, + 178, 1, 179, 1, 180, 1, 181, 1, + 182, 1, 183, 1, 184, 1, 185, 1, + 186, 1, 187, 1, 188, 1, 189, 1, + 190, 1, 191, 1, 192, 1, 193, 1, + 194, 1, 195, 1, 196, 1, 197, 1, + 198, 1, 199, 1, 200, 1, 201, 1, + 202, 1, 203, 1, 204, 1, 205, 1, + 206, 1, 207, 1, 208, 1, 209, 1, + 210, 1, 211, 1, 212, 1, 213, 1, + 214, 1, 215, 1, 216, 1, 217, 1, + 218, 1, 219, 1, 220, 1, 221, 1, + 222, 1, 223, 1, 224, 1, 225, 1, + 226, 1, 227, 1, 228, 1, 229, 1, + 230, 1, 231, 1, 232, 1, 233, 1, + 234, 1, 235, 1, 236, 1, 237, 1, + 240, 1, 242, 1, 243, 1, 244, 1, + 245, 1, 246, 1, 247, 1, 248, 1, + 249, 1, 250, 1, 251, 1, 252, 1, + 253, 1, 254, 1, 255, 1, 256, 1, + 257, 1, 258, 1, 259, 1, 260, 1, + 261, 1, 262, 1, 263, 1, 264, 1, + 265, 1, 266, 1, 267, 1, 268, 1, + 269, 1, 270, 1, 271, 1, 272, 1, + 273, 1, 274, 1, 275, 1, 276, 1, + 277, 1, 278, 1, 279, 1, 280, 1, + 281, 1, 282, 1, 283, 1, 284, 1, + 285, 1, 286, 1, 287, 1, 288, 1, + 289, 1, 290, 1, 291, 1, 292, 1, + 293, 1, 294, 1, 295, 1, 296, 1, + 297, 1, 298, 1, 299, 1, 300, 1, + 301, 1, 302, 1, 303, 1, 307, 1, + 308, 1, 309, 1, 310, 1, 311, 1, + 312, 1, 313, 1, 314, 1, 315, 1, + 316, 1, 317, 1, 318, 1, 319, 1, + 320, 1, 321, 1, 322, 1, 323, 1, + 324, 1, 325, 1, 326, 1, 327, 1, + 328, 1, 329, 1, 330, 1, 331, 1, + 332, 1, 333, 1, 334, 1, 335, 1, + 336, 1, 337, 1, 338, 1, 342, 1, + 343, 1, 344, 1, 345, 1, 346, 1, + 347, 1, 348, 1, 349, 1, 350, 1, + 352, 1, 353, 1, 354, 1, 355, 1, + 356, 1, 357, 1, 358, 1, 359, 1, + 360, 1, 361, 1, 362, 1, 363, 1, + 364, 1, 365, 1, 366, 1, 367, 1, + 368, 1, 369, 1, 370, 1, 371, 1, + 372, 1, 373, 1, 374, 1, 375, 1, + 376, 1, 377, 1, 378, 1, 379, 1, + 380, 1, 381, 1, 382, 1, 383, 1, + 384, 1, 385, 1, 386, 1, 387, 1, + 388, 1, 389, 1, 390, 1, 391, 1, + 392, 1, 393, 1, 394, 1, 395, 1, + 396, 1, 397, 1, 398, 1, 399, 1, + 400, 1, 401, 1, 402, 1, 403, 1, + 404, 1, 405, 1, 406, 1, 407, 1, + 408, 1, 409, 1, 410, 1, 411, 1, + 412, 1, 413, 1, 414, 1, 415, 1, + 416, 1, 417, 1, 418, 1, 419, 1, + 420, 1, 421, 1, 422, 1, 423, 1, + 424, 1, 425, 1, 426, 1, 427, 1, + 428, 1, 429, 1, 430, 1, 431, 1, + 432, 1, 433, 1, 434, 1, 435, 1, + 436, 2, 3, 0, 2, 4, 5, 2, + 5, 1, 2, 9, 10, 2, 9, 238, + 2, 9, 239, 2, 9, 339, 2, 10, + 1, 2, 10, 340, 2, 10, 341, 2, + 11, 241, 2, 11, 351, 2, 12, 241, + 2, 12, 351, 2, 13, 241, 2, 13, + 351, 2, 14, 375, 2, 14, 376, 2, + 25, 0, 2, 25, 3, 2, 25, 6, + 2, 25, 14, 3, 25, 5, 306, 3, + 25, 10, 305, 3, 25, 14, 15, 4, + 25, 9, 304, 10 +}; + +static const char _regex_cond_offsets[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, + 2, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 19, 20, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 27, 28, 29, 31, 31, 36, + 36, 37, 38, 39, 44, 44, 45, 46, + 47, 47 +}; + +static const char _regex_cond_lengths[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 6, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 5, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 2, 0, 5, 0, + 1, 1, 1, 5, 0, 1, 1, 1, + 0, 0 +}; + +static const short _regex_cond_keys[] = { + -128, -65, -128, -65, -128, -65, -128, -65, + -128, -65, -128, -65, -128, -65, -128, -65, + -128, -65, -128, -65, -128, -65, -128, -65, + -128, -65, -64, -33, -32, -17, -16, -9, + -8, -1, 35, 35, -128, -65, -128, -65, + -128, -65, -128, -65, -64, -33, -32, -17, + -16, -9, -8, -1, -128, -65, -128, -65, + -128, -65, 93, 93, 94, 94, -128, -65, + -64, -33, -32, -17, -16, -9, -8, -1, + -128, -65, -128, -65, -128, -65, -128, -65, + -64, -33, -32, -17, -16, -9, -8, -1, + -128, -65, -128, -65, -128, -65, 0 +}; + +static const char _regex_cond_spaces[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static const short _regex_key_offsets[] = { + 0, 0, 1, 23, 31, 39, 46, 54, + 55, 63, 71, 79, 86, 94, 97, 99, + 108, 115, 123, 131, 134, 140, 148, 151, + 158, 165, 173, 180, 184, 191, 194, 197, + 199, 202, 205, 207, 210, 213, 215, 216, + 218, 219, 227, 229, 232, 235, 236, 244, + 252, 260, 268, 275, 283, 290, 298, 305, + 313, 315, 318, 325, 329, 332, 335, 337, + 339, 341, 342, 344, 345, 347, 349, 350, + 351, 353, 354, 355, 356, 357, 358, 359, + 360, 361, 362, 363, 364, 365, 366, 369, + 370, 371, 372, 373, 374, 375, 376, 377, + 378, 379, 380, 381, 382, 383, 384, 385, + 386, 387, 388, 389, 390, 392, 393, 394, + 395, 396, 397, 399, 400, 401, 402, 403, + 404, 405, 406, 408, 409, 410, 411, 412, + 413, 414, 415, 416, 417, 418, 419, 420, + 421, 422, 423, 424, 425, 426, 427, 429, + 430, 431, 432, 433, 434, 435, 436, 437, + 438, 439, 440, 441, 442, 443, 444, 445, + 446, 447, 448, 450, 451, 452, 453, 454, + 455, 456, 457, 458, 459, 461, 462, 463, + 464, 465, 466, 467, 468, 469, 470, 471, + 472, 473, 474, 475, 476, 477, 478, 479, + 480, 481, 482, 483, 484, 485, 486, 487, + 488, 489, 490, 491, 492, 493, 494, 495, + 496, 497, 498, 499, 500, 501, 502, 503, + 504, 505, 506, 507, 508, 509, 510, 511, + 512, 513, 514, 515, 516, 517, 519, 520, + 521, 522, 523, 524, 525, 526, 527, 528, + 529, 530, 531, 532, 533, 534, 535, 536, + 537, 538, 539, 540, 541, 542, 543, 544, + 545, 546, 547, 548, 549, 550, 551, 552, + 553, 554, 555, 556, 557, 558, 559, 561, + 562, 563, 564, 565, 566, 567, 568, 569, + 570, 571, 572, 573, 574, 575, 576, 577, + 578, 579, 580, 582, 583, 584, 585, 586, + 587, 588, 589, 590, 591, 592, 593, 594, + 595, 596, 597, 601, 602, 603, 604, 605, + 606, 607, 608, 609, 610, 611, 612, 613, + 614, 615, 616, 617, 618, 620, 621, 622, + 623, 624, 625, 626, 627, 628, 629, 631, + 632, 633, 634, 635, 636, 637, 640, 641, + 642, 643, 644, 645, 646, 647, 648, 650, + 651, 652, 653, 654, 655, 656, 658, 659, + 660, 661, 662, 663, 664, 665, 666, 667, + 668, 669, 670, 671, 672, 673, 674, 675, + 676, 677, 678, 679, 680, 681, 682, 683, + 684, 685, 686, 687, 688, 689, 690, 691, + 692, 693, 694, 695, 696, 697, 698, 699, + 700, 701, 702, 704, 705, 706, 707, 708, + 709, 710, 714, 715, 716, 717, 718, 719, + 720, 721, 722, 723, 724, 725, 726, 727, + 728, 729, 730, 731, 732, 733, 734, 735, + 736, 737, 738, 739, 740, 741, 742, 743, + 744, 745, 746, 747, 748, 749, 750, 752, + 753, 754, 755, 756, 757, 758, 759, 760, + 761, 762, 763, 764, 765, 766, 767, 768, + 769, 770, 771, 773, 774, 775, 776, 777, + 778, 779, 780, 781, 782, 783, 784, 785, + 786, 787, 788, 789, 790, 791, 792, 793, + 794, 795, 796, 797, 798, 799, 800, 801, + 802, 803, 805, 806, 807, 808, 809, 810, + 811, 812, 813, 814, 815, 816, 817, 820, + 822, 823, 824, 825, 826, 827, 828, 829, + 830, 833, 834, 835, 836, 837, 838, 839, + 840, 841, 842, 843, 844, 845, 846, 847, + 849, 850, 851, 853, 854, 855, 856, 857, + 858, 859, 860, 861, 862, 863, 864, 865, + 866, 867, 868, 869, 870, 871, 872, 873, + 874, 875, 876, 877, 880, 883, 885, 900, + 903, 906, 908, 922, 927, 932, 936, 940, + 943, 946, 950, 954, 957, 960, 964, 968, + 972, 975, 978, 982, 986, 990, 994, 997, + 1000, 1004, 1008, 1012, 1016, 1019, 1022, 1026, + 1030, 1034, 1038, 1041, 1044, 1048, 1052, 1056, + 1060, 1063, 1066, 1070, 1074, 1078, 1082, 1085, + 1088, 1093, 1097, 1101, 1105, 1108, 1111, 1115, + 1119, 1123, 1126, 1129, 1133, 1137, 1141, 1145, + 1148, 1151, 1155, 1159, 1163, 1167, 1170, 1173, + 1177, 1181, 1185, 1188, 1191, 1195, 1199, 1203, + 1207, 1211, 1214, 1217, 1222, 1227, 1231, 1235, + 1238, 1241, 1245, 1249, 1252, 1255, 1259, 1263, + 1267, 1270, 1273, 1277, 1281, 1285, 1289, 1292, + 1295, 1299, 1303, 1307, 1311, 1314, 1317, 1321, + 1325, 1329, 1333, 1336, 1339, 1343, 1347, 1351, + 1355, 1358, 1361, 1365, 1369, 1373, 1377, 1380, + 1383, 1388, 1392, 1396, 1400, 1403, 1406, 1410, + 1414, 1418, 1421, 1424, 1428, 1432, 1436, 1440, + 1443, 1446, 1450, 1454, 1458, 1462, 1465, 1468, + 1472, 1476, 1480, 1483, 1486, 1490, 1494, 1498, + 1502, 1506, 1509, 1512, 1515, 1518, 1520, 1522, + 1525, 1532, 1534, 1536, 1538, 1540, 1542, 1544, + 1546, 1548, 1550, 1584, 1586, 1593, 1600, 1614, + 1616, 1622, 1625, 1634, 1635, 1638, 1641, 1648, + 1650, 1652, 1654, 1657, 1702, 1704, 1706, 1710, + 1714, 1716, 1717, 1717, 1723, 1725, 1727, 1729, + 1731, 1734, 1735, 1736, 1743, 1749, 1755, 1757, + 1759, 1761, 1763, 1765, 1767, 1768, 1771, 1794, + 1797, 1802, 1811, 1813, 1814, 1816, 1821, 1824, + 1826, 1828, 1829, 1831, 1841, 1847, 1848, 1853, + 1857, 1865, 1867, 1876, 1880, 1881, 1882, 1886, + 1887, 1890, 1890, 1897, 1913, 1916, 1955, 1957, + 1959, 1961, 1963, 1964, 1964, 1965, 1966, 1973, + 1979, 1985, 1987, 1989, 1991, 2000, 2002, 2015, + 2016, 2018, 2020, 2022, 2035, 2036, 2038, 2040, + 2042, 2043 +}; + +static const short _regex_trans_keys[] = { + 41, 33, 35, 38, 39, 40, 41, 43, + 45, 58, 60, 61, 62, 63, 67, 80, + 105, 109, 115, 120, 123, 48, 57, 41, + 95, 48, 57, 65, 90, 97, 122, 39, + 95, 48, 57, 65, 90, 97, 122, 95, + 48, 57, 65, 90, 97, 122, 39, 95, + 48, 57, 65, 90, 97, 122, 41, 41, + 95, 48, 57, 65, 90, 97, 122, 41, + 95, 48, 57, 65, 90, 97, 122, 41, + 95, 48, 57, 65, 90, 97, 122, 95, + 48, 57, 65, 90, 97, 122, 62, 95, + 48, 57, 65, 90, 97, 122, 33, 60, + 61, 33, 61, 38, 41, 95, 48, 57, + 65, 90, 97, 122, 95, 48, 57, 65, + 90, 97, 122, 41, 95, 48, 57, 65, + 90, 97, 122, 41, 95, 48, 57, 65, + 90, 97, 122, 41, 48, 57, 41, 58, + 105, 109, 115, 120, 62, 95, 48, 57, + 65, 90, 97, 122, 41, 48, 57, 95, + 48, 57, 65, 90, 97, 122, 95, 48, + 57, 65, 90, 97, 122, 41, 95, 48, + 57, 65, 90, 97, 122, 95, 48, 57, + 65, 90, 97, 122, 105, 109, 115, 120, + 41, 45, 58, 105, 109, 115, 120, 46, + 92, 93, 46, 92, 93, 46, 92, 58, + 92, 93, 58, 92, 93, 58, 92, 61, + 92, 93, 61, 92, 93, 61, 92, 39, + 48, 57, 62, 45, 95, 48, 57, 65, + 90, 97, 122, 48, 57, 125, 48, 57, + 125, 48, 57, 125, 95, 125, 48, 57, + 65, 90, 97, 122, 95, 125, 48, 57, + 65, 90, 97, 122, 95, 125, 48, 57, + 65, 90, 97, 122, 95, 125, 48, 57, + 65, 90, 97, 122, 95, 48, 57, 65, + 90, 97, 122, 39, 95, 48, 57, 65, + 90, 97, 122, 95, 48, 57, 65, 90, + 97, 122, 62, 95, 48, 57, 65, 90, + 97, 122, 95, 48, 57, 65, 90, 97, + 122, 95, 125, 48, 57, 65, 90, 97, + 122, 48, 55, 125, 48, 55, 125, 48, + 57, 65, 70, 97, 102, 44, 125, 48, + 57, 125, 48, 57, 125, 48, 57, 384, + 447, 384, 447, 384, 447, 41, 41, 80, + 41, 41, 70, 41, 56, 41, 121, 97, + 109, 98, 105, 99, 101, 110, 105, 97, + 110, 101, 115, 116, 97, 110, 108, 109, + 116, 105, 110, 101, 115, 101, 117, 109, + 97, 107, 110, 103, 97, 108, 105, 112, + 111, 109, 111, 102, 111, 97, 104, 105, + 109, 105, 108, 108, 101, 103, 104, 105, + 110, 101, 115, 101, 105, 100, 110, 114, + 97, 100, 105, 97, 110, 95, 65, 98, + 111, 114, 105, 103, 105, 110, 97, 108, + 105, 97, 110, 97, 101, 109, 114, 111, + 107, 101, 101, 109, 111, 110, 116, 105, + 99, 110, 101, 105, 102, 111, 114, 109, + 112, 114, 114, 105, 111, 116, 105, 108, + 108, 105, 99, 115, 118, 101, 114, 101, + 116, 97, 110, 97, 103, 97, 114, 105, + 121, 112, 116, 105, 97, 110, 95, 72, + 105, 101, 114, 111, 103, 108, 121, 112, + 104, 115, 104, 105, 111, 112, 105, 99, + 111, 114, 103, 105, 97, 110, 97, 103, + 111, 108, 105, 116, 105, 99, 116, 104, + 105, 99, 101, 101, 107, 106, 114, 97, + 114, 97, 116, 105, 109, 117, 107, 104, + 105, 110, 117, 108, 110, 111, 111, 98, + 114, 101, 119, 114, 97, 103, 97, 110, + 97, 112, 101, 114, 105, 97, 108, 95, + 65, 114, 97, 109, 97, 105, 99, 104, + 115, 101, 114, 105, 116, 101, 100, 99, + 114, 105, 112, 116, 105, 111, 110, 97, + 108, 95, 80, 97, 104, 114, 108, 97, + 118, 105, 116, 104, 105, 97, 110, 118, + 97, 110, 101, 115, 101, 105, 110, 116, + 121, 116, 104, 105, 110, 97, 100, 97, + 97, 107, 97, 110, 97, 97, 104, 95, + 76, 105, 97, 109, 114, 111, 115, 104, + 116, 104, 105, 101, 114, 111, 116, 105, + 110, 112, 99, 104, 97, 109, 110, 115, + 98, 117, 101, 97, 114, 95, 66, 117, + 99, 100, 105, 97, 110, 105, 97, 110, + 108, 110, 97, 121, 97, 108, 97, 109, + 100, 97, 105, 99, 116, 101, 105, 95, + 77, 97, 121, 101, 107, 110, 103, 111, + 108, 105, 97, 110, 97, 110, 109, 97, + 114, 119, 95, 84, 97, 105, 95, 76, + 117, 101, 111, 104, 97, 109, 95, 100, + 67, 104, 105, 107, 105, 95, 73, 80, + 83, 84, 116, 97, 108, 105, 99, 101, + 114, 115, 105, 97, 110, 111, 117, 116, + 104, 95, 65, 114, 97, 98, 105, 97, + 110, 117, 114, 107, 105, 99, 105, 121, + 97, 109, 97, 110, 121, 97, 97, 111, + 103, 115, 95, 80, 97, 101, 110, 105, + 99, 105, 97, 110, 106, 97, 110, 103, + 110, 105, 99, 109, 117, 97, 114, 105, + 116, 97, 110, 114, 97, 115, 104, 116, + 114, 97, 97, 118, 105, 97, 110, 110, + 104, 97, 108, 97, 110, 100, 97, 110, + 101, 115, 101, 108, 114, 111, 116, 105, + 95, 78, 97, 103, 114, 105, 105, 97, + 99, 103, 105, 109, 97, 98, 108, 111, + 103, 97, 110, 119, 97, 95, 76, 84, + 86, 101, 104, 97, 109, 105, 101, 116, + 105, 108, 108, 117, 103, 117, 97, 97, + 105, 110, 97, 98, 102, 101, 116, 97, + 110, 105, 110, 97, 103, 104, 97, 114, + 105, 116, 105, 99, 105, 110, 115, 112, + 100, 123, 94, 125, 94, 46, 92, 93, + 46, 92, 93, 46, 92, 58, 92, 93, + 94, 97, 98, 99, 100, 103, 108, 112, + 115, 117, 119, 120, 58, 92, 93, 58, + 92, 93, 58, 92, 58, 92, 93, 97, + 98, 99, 100, 103, 108, 112, 115, 117, + 119, 120, 58, 92, 93, 108, 115, 58, + 92, 93, 110, 112, 58, 92, 93, 117, + 58, 92, 93, 109, 58, 92, 93, 58, + 92, 93, 58, 92, 93, 104, 58, 92, + 93, 97, 58, 92, 93, 58, 92, 93, + 58, 92, 93, 99, 58, 92, 93, 105, + 58, 92, 93, 105, 58, 92, 93, 58, + 92, 93, 58, 92, 93, 108, 58, 92, + 93, 97, 58, 92, 93, 110, 58, 92, + 93, 107, 58, 92, 93, 58, 92, 93, + 58, 92, 93, 110, 58, 92, 93, 116, + 58, 92, 93, 114, 58, 92, 93, 108, + 58, 92, 93, 58, 92, 93, 58, 92, + 93, 105, 58, 92, 93, 103, 58, 92, + 93, 105, 58, 92, 93, 116, 58, 92, + 93, 58, 92, 93, 58, 92, 93, 114, + 58, 92, 93, 97, 58, 92, 93, 112, + 58, 92, 93, 104, 58, 92, 93, 58, + 92, 93, 58, 92, 93, 111, 58, 92, + 93, 119, 58, 92, 93, 101, 58, 92, + 93, 114, 58, 92, 93, 58, 92, 93, + 58, 92, 93, 114, 117, 58, 92, 93, + 105, 58, 92, 93, 110, 58, 92, 93, + 116, 58, 92, 93, 58, 92, 93, 58, + 92, 93, 110, 58, 92, 93, 99, 58, + 92, 93, 116, 58, 92, 93, 58, 92, + 93, 58, 92, 93, 112, 58, 92, 93, + 97, 58, 92, 93, 99, 58, 92, 93, + 101, 58, 92, 93, 58, 92, 93, 58, + 92, 93, 112, 58, 92, 93, 112, 58, + 92, 93, 101, 58, 92, 93, 114, 58, + 92, 93, 58, 92, 93, 58, 92, 93, + 111, 58, 92, 93, 114, 58, 92, 93, + 100, 58, 92, 93, 58, 92, 93, 58, + 92, 93, 100, 58, 92, 93, 105, 58, + 92, 93, 103, 58, 92, 93, 105, 58, + 92, 93, 116, 58, 92, 93, 58, 92, + 93, 58, 92, 93, 108, 115, 58, 92, + 93, 110, 112, 58, 92, 93, 117, 58, + 92, 93, 109, 58, 92, 93, 58, 92, + 93, 58, 92, 93, 104, 58, 92, 93, + 97, 58, 92, 93, 58, 92, 93, 58, + 92, 93, 99, 58, 92, 93, 105, 58, + 92, 93, 105, 58, 92, 93, 58, 92, + 93, 58, 92, 93, 108, 58, 92, 93, + 97, 58, 92, 93, 110, 58, 92, 93, + 107, 58, 92, 93, 58, 92, 93, 58, + 92, 93, 110, 58, 92, 93, 116, 58, + 92, 93, 114, 58, 92, 93, 108, 58, + 92, 93, 58, 92, 93, 58, 92, 93, + 105, 58, 92, 93, 103, 58, 92, 93, + 105, 58, 92, 93, 116, 58, 92, 93, + 58, 92, 93, 58, 92, 93, 114, 58, + 92, 93, 97, 58, 92, 93, 112, 58, + 92, 93, 104, 58, 92, 93, 58, 92, + 93, 58, 92, 93, 111, 58, 92, 93, + 119, 58, 92, 93, 101, 58, 92, 93, + 114, 58, 92, 93, 58, 92, 93, 58, + 92, 93, 114, 117, 58, 92, 93, 105, + 58, 92, 93, 110, 58, 92, 93, 116, + 58, 92, 93, 58, 92, 93, 58, 92, + 93, 110, 58, 92, 93, 99, 58, 92, + 93, 116, 58, 92, 93, 58, 92, 93, + 58, 92, 93, 112, 58, 92, 93, 97, + 58, 92, 93, 99, 58, 92, 93, 101, + 58, 92, 93, 58, 92, 93, 58, 92, + 93, 112, 58, 92, 93, 112, 58, 92, + 93, 101, 58, 92, 93, 114, 58, 92, + 93, 58, 92, 93, 58, 92, 93, 111, + 58, 92, 93, 114, 58, 92, 93, 100, + 58, 92, 93, 58, 92, 93, 58, 92, + 93, 100, 58, 92, 93, 105, 58, 92, + 93, 103, 58, 92, 93, 105, 58, 92, + 93, 116, 58, 92, 93, 58, 92, 93, + 61, 92, 93, 61, 92, 93, 61, 92, + 48, 55, 125, 48, 55, 125, 48, 57, + 65, 70, 97, 102, 384, 447, 384, 447, + 384, 447, 384, 447, 384, 447, 384, 447, + 384, 447, 384, 447, 384, 447, 0, 32, + 36, 40, 41, 42, 43, 46, 63, 91, + 92, 94, 123, 124, 1315, 1571, 1, 8, + 9, 13, 14, 34, 37, 255, 384, 447, + 448, 479, 480, 495, 496, 503, 504, 511, + 42, 63, 95, 48, 57, 65, 90, 97, + 122, 95, 48, 57, 65, 90, 97, 122, + 39, 48, 60, 63, 82, 95, 49, 55, + 56, 57, 65, 90, 97, 122, 48, 57, + 105, 109, 115, 120, 48, 57, 41, 48, + 57, 33, 61, 95, 48, 57, 65, 90, + 97, 122, 123, 41, 48, 57, 60, 61, + 62, 41, 45, 58, 105, 109, 115, 120, + 43, 63, 43, 63, 43, 63, 46, 58, + 61, 48, 65, 66, 67, 68, 69, 71, + 72, 75, 76, 78, 80, 81, 82, 83, + 85, 86, 87, 88, 90, 97, 98, 99, + 100, 101, 102, 103, 104, 107, 108, 110, + 111, 112, 114, 115, 116, 117, 118, 119, + 120, 122, 49, 55, 56, 57, 48, 55, + 48, 55, 48, 55, 56, 57, 48, 55, + 56, 57, 48, 57, 123, 39, 45, 60, + 123, 48, 57, 48, 57, 48, 57, 48, + 57, 48, 57, 39, 60, 123, 123, 123, + 123, 48, 57, 65, 70, 97, 102, 48, + 57, 65, 70, 97, 102, 48, 57, 65, + 70, 97, 102, 48, 57, 43, 63, 384, + 447, 384, 447, 384, 447, 41, 85, 41, + 41, 67, 84, 65, 66, 67, 68, 69, + 71, 72, 73, 74, 75, 76, 77, 78, + 79, 80, 82, 83, 84, 85, 86, 88, + 89, 90, 110, 114, 118, 97, 101, 111, + 114, 117, 97, 99, 102, 104, 110, 111, + 115, 117, 121, 109, 112, 101, 103, 116, + 101, 108, 111, 114, 117, 97, 101, 105, + 103, 117, 109, 110, 97, 97, 104, 38, + 97, 101, 105, 108, 109, 111, 116, 117, + 121, 97, 99, 101, 110, 111, 121, 101, + 100, 101, 107, 108, 111, 103, 108, 114, + 115, 99, 100, 101, 102, 104, 105, 111, + 115, 101, 117, 97, 99, 104, 105, 107, + 109, 111, 117, 121, 97, 101, 104, 105, + 103, 97, 97, 112, 115, 119, 105, 108, + 112, 115, 67, 76, 77, 78, 80, 83, + 90, 45, 91, 92, 93, 0, 255, 384, + 447, 448, 479, 480, 495, 496, 503, 504, + 511, 46, 58, 61, 48, 68, 69, 72, + 76, 78, 80, 81, 83, 85, 86, 87, + 97, 98, 99, 100, 101, 102, 103, 104, + 108, 110, 111, 112, 114, 115, 116, 117, + 118, 119, 120, 49, 55, 56, 57, 65, + 90, 105, 122, 48, 55, 48, 55, 48, + 55, 48, 55, 123, 123, 123, 123, 48, + 57, 65, 70, 97, 102, 48, 57, 65, + 70, 97, 102, 48, 57, 65, 70, 97, + 102, 384, 447, 384, 447, 384, 447, 92, + 1117, 1118, -128, 91, 95, 127, 861, 862, + 69, 81, 92, 0, 255, 384, 447, 448, + 479, 480, 495, 496, 503, 504, 511, 69, + 384, 447, 384, 447, 384, 447, 92, 0, + 255, 384, 447, 448, 479, 480, 495, 496, + 503, 504, 511, 69, 384, 447, 384, 447, + 384, 447, 41, 10, 0 +}; + +static const char _regex_single_lengths[] = { + 0, 1, 20, 2, 2, 1, 2, 1, + 2, 2, 2, 1, 2, 3, 2, 3, + 1, 2, 2, 1, 6, 2, 1, 1, + 1, 2, 1, 4, 7, 3, 3, 2, + 3, 3, 2, 3, 3, 2, 1, 0, + 1, 2, 0, 1, 1, 1, 2, 2, + 2, 2, 1, 2, 1, 2, 1, 2, + 0, 1, 1, 2, 1, 1, 0, 0, + 0, 1, 2, 1, 2, 2, 1, 1, + 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 2, 1, 1, 1, + 1, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 2, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 2, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 4, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 2, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 1, + 1, 1, 1, 1, 1, 3, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 1, + 1, 1, 1, 1, 1, 2, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 1, 1, 1, 1, 1, + 1, 4, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 2, + 1, 1, 1, 1, 1, 1, 1, 1, + 3, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 2, + 1, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 3, 3, 2, 15, 3, + 3, 2, 14, 5, 5, 4, 4, 3, + 3, 4, 4, 3, 3, 4, 4, 4, + 3, 3, 4, 4, 4, 4, 3, 3, + 4, 4, 4, 4, 3, 3, 4, 4, + 4, 4, 3, 3, 4, 4, 4, 4, + 3, 3, 4, 4, 4, 4, 3, 3, + 5, 4, 4, 4, 3, 3, 4, 4, + 4, 3, 3, 4, 4, 4, 4, 3, + 3, 4, 4, 4, 4, 3, 3, 4, + 4, 4, 3, 3, 4, 4, 4, 4, + 4, 3, 3, 5, 5, 4, 4, 3, + 3, 4, 4, 3, 3, 4, 4, 4, + 3, 3, 4, 4, 4, 4, 3, 3, + 4, 4, 4, 4, 3, 3, 4, 4, + 4, 4, 3, 3, 4, 4, 4, 4, + 3, 3, 4, 4, 4, 4, 3, 3, + 5, 4, 4, 4, 3, 3, 4, 4, + 4, 3, 3, 4, 4, 4, 4, 3, + 3, 4, 4, 4, 4, 3, 3, 4, + 4, 4, 3, 3, 4, 4, 4, 4, + 4, 3, 3, 3, 3, 2, 0, 1, + 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 16, 2, 1, 1, 6, 0, + 4, 1, 3, 1, 1, 3, 7, 2, + 2, 2, 3, 41, 0, 0, 0, 0, + 0, 1, 0, 4, 0, 0, 0, 0, + 3, 1, 1, 1, 0, 0, 0, 2, + 0, 0, 0, 2, 1, 3, 23, 3, + 5, 9, 2, 1, 2, 5, 3, 2, + 2, 1, 2, 10, 6, 1, 5, 4, + 8, 2, 9, 4, 1, 1, 4, 1, + 3, 0, 7, 4, 3, 31, 0, 0, + 0, 0, 1, 0, 1, 1, 1, 0, + 0, 0, 0, 0, 3, 2, 1, 1, + 0, 0, 0, 1, 1, 0, 0, 0, + 1, 1 +}; + +static const char _regex_range_lengths[] = { + 0, 0, 1, 3, 3, 3, 3, 0, + 3, 3, 3, 3, 3, 0, 0, 3, + 3, 3, 3, 1, 0, 3, 1, 3, + 3, 3, 3, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, + 0, 3, 1, 1, 1, 0, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 1, 1, 3, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 1, + 3, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 9, 0, 3, 3, 4, 1, + 1, 1, 3, 0, 1, 0, 0, 0, + 0, 0, 0, 2, 1, 1, 2, 2, + 1, 0, 0, 1, 1, 1, 1, 1, + 0, 0, 0, 3, 3, 3, 1, 0, + 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 6, 0, 4, 1, 1, + 1, 1, 0, 0, 0, 0, 3, 3, + 3, 1, 1, 1, 3, 0, 6, 0, + 1, 1, 1, 6, 0, 1, 1, 1, + 0, 0 +}; + +static const short _regex_index_offsets[] = { + 0, 0, 2, 24, 30, 36, 41, 47, + 49, 55, 61, 67, 72, 78, 82, 85, + 92, 97, 103, 109, 112, 119, 125, 128, + 133, 138, 144, 149, 154, 162, 166, 170, + 173, 177, 181, 184, 188, 192, 195, 197, + 199, 201, 207, 209, 212, 215, 217, 223, + 229, 235, 241, 246, 252, 257, 263, 268, + 274, 276, 279, 284, 288, 291, 294, 296, + 298, 300, 302, 305, 307, 310, 313, 315, + 317, 320, 322, 324, 326, 328, 330, 332, + 334, 336, 338, 340, 342, 344, 346, 350, + 352, 354, 356, 358, 360, 362, 364, 366, + 368, 370, 372, 374, 376, 378, 380, 382, + 384, 386, 388, 390, 392, 395, 397, 399, + 401, 403, 405, 408, 410, 412, 414, 416, + 418, 420, 422, 425, 427, 429, 431, 433, + 435, 437, 439, 441, 443, 445, 447, 449, + 451, 453, 455, 457, 459, 461, 463, 466, + 468, 470, 472, 474, 476, 478, 480, 482, + 484, 486, 488, 490, 492, 494, 496, 498, + 500, 502, 504, 507, 509, 511, 513, 515, + 517, 519, 521, 523, 525, 528, 530, 532, + 534, 536, 538, 540, 542, 544, 546, 548, + 550, 552, 554, 556, 558, 560, 562, 564, + 566, 568, 570, 572, 574, 576, 578, 580, + 582, 584, 586, 588, 590, 592, 594, 596, + 598, 600, 602, 604, 606, 608, 610, 612, + 614, 616, 618, 620, 622, 624, 626, 628, + 630, 632, 634, 636, 638, 640, 643, 645, + 647, 649, 651, 653, 655, 657, 659, 661, + 663, 665, 667, 669, 671, 673, 675, 677, + 679, 681, 683, 685, 687, 689, 691, 693, + 695, 697, 699, 701, 703, 705, 707, 709, + 711, 713, 715, 717, 719, 721, 723, 726, + 728, 730, 732, 734, 736, 738, 740, 742, + 744, 746, 748, 750, 752, 754, 756, 758, + 760, 762, 764, 767, 769, 771, 773, 775, + 777, 779, 781, 783, 785, 787, 789, 791, + 793, 795, 797, 802, 804, 806, 808, 810, + 812, 814, 816, 818, 820, 822, 824, 826, + 828, 830, 832, 834, 836, 839, 841, 843, + 845, 847, 849, 851, 853, 855, 857, 860, + 862, 864, 866, 868, 870, 872, 876, 878, + 880, 882, 884, 886, 888, 890, 892, 895, + 897, 899, 901, 903, 905, 907, 910, 912, + 914, 916, 918, 920, 922, 924, 926, 928, + 930, 932, 934, 936, 938, 940, 942, 944, + 946, 948, 950, 952, 954, 956, 958, 960, + 962, 964, 966, 968, 970, 972, 974, 976, + 978, 980, 982, 984, 986, 988, 990, 992, + 994, 996, 998, 1001, 1003, 1005, 1007, 1009, + 1011, 1013, 1018, 1020, 1022, 1024, 1026, 1028, + 1030, 1032, 1034, 1036, 1038, 1040, 1042, 1044, + 1046, 1048, 1050, 1052, 1054, 1056, 1058, 1060, + 1062, 1064, 1066, 1068, 1070, 1072, 1074, 1076, + 1078, 1080, 1082, 1084, 1086, 1088, 1090, 1093, + 1095, 1097, 1099, 1101, 1103, 1105, 1107, 1109, + 1111, 1113, 1115, 1117, 1119, 1121, 1123, 1125, + 1127, 1129, 1131, 1134, 1136, 1138, 1140, 1142, + 1144, 1146, 1148, 1150, 1152, 1154, 1156, 1158, + 1160, 1162, 1164, 1166, 1168, 1170, 1172, 1174, + 1176, 1178, 1180, 1182, 1184, 1186, 1188, 1190, + 1192, 1194, 1197, 1199, 1201, 1203, 1205, 1207, + 1209, 1211, 1213, 1215, 1217, 1219, 1221, 1225, + 1228, 1230, 1232, 1234, 1236, 1238, 1240, 1242, + 1244, 1248, 1250, 1252, 1254, 1256, 1258, 1260, + 1262, 1264, 1266, 1268, 1270, 1272, 1274, 1276, + 1279, 1281, 1283, 1286, 1288, 1290, 1292, 1294, + 1296, 1298, 1300, 1302, 1304, 1306, 1308, 1310, + 1312, 1314, 1316, 1318, 1320, 1322, 1324, 1326, + 1328, 1330, 1332, 1334, 1338, 1342, 1345, 1361, + 1365, 1369, 1372, 1387, 1393, 1399, 1404, 1409, + 1413, 1417, 1422, 1427, 1431, 1435, 1440, 1445, + 1450, 1454, 1458, 1463, 1468, 1473, 1478, 1482, + 1486, 1491, 1496, 1501, 1506, 1510, 1514, 1519, + 1524, 1529, 1534, 1538, 1542, 1547, 1552, 1557, + 1562, 1566, 1570, 1575, 1580, 1585, 1590, 1594, + 1598, 1604, 1609, 1614, 1619, 1623, 1627, 1632, + 1637, 1642, 1646, 1650, 1655, 1660, 1665, 1670, + 1674, 1678, 1683, 1688, 1693, 1698, 1702, 1706, + 1711, 1716, 1721, 1725, 1729, 1734, 1739, 1744, + 1749, 1754, 1758, 1762, 1768, 1774, 1779, 1784, + 1788, 1792, 1797, 1802, 1806, 1810, 1815, 1820, + 1825, 1829, 1833, 1838, 1843, 1848, 1853, 1857, + 1861, 1866, 1871, 1876, 1881, 1885, 1889, 1894, + 1899, 1904, 1909, 1913, 1917, 1922, 1927, 1932, + 1937, 1941, 1945, 1950, 1955, 1960, 1965, 1969, + 1973, 1979, 1984, 1989, 1994, 1998, 2002, 2007, + 2012, 2017, 2021, 2025, 2030, 2035, 2040, 2045, + 2049, 2053, 2058, 2063, 2068, 2073, 2077, 2081, + 2086, 2091, 2096, 2100, 2104, 2109, 2114, 2119, + 2124, 2129, 2133, 2137, 2141, 2145, 2148, 2150, + 2153, 2158, 2160, 2162, 2164, 2166, 2168, 2170, + 2172, 2174, 2176, 2202, 2205, 2210, 2215, 2226, + 2228, 2234, 2237, 2244, 2246, 2249, 2253, 2261, + 2264, 2267, 2270, 2274, 2318, 2320, 2322, 2325, + 2328, 2330, 2332, 2333, 2339, 2341, 2343, 2345, + 2347, 2351, 2353, 2355, 2360, 2364, 2368, 2370, + 2373, 2375, 2377, 2379, 2382, 2384, 2388, 2412, + 2416, 2422, 2432, 2435, 2437, 2440, 2446, 2450, + 2453, 2456, 2458, 2461, 2472, 2479, 2481, 2487, + 2492, 2501, 2504, 2514, 2519, 2521, 2523, 2528, + 2530, 2534, 2535, 2543, 2554, 2558, 2594, 2596, + 2598, 2600, 2602, 2604, 2605, 2607, 2609, 2614, + 2618, 2622, 2624, 2626, 2628, 2635, 2638, 2646, + 2648, 2650, 2652, 2654, 2662, 2664, 2666, 2668, + 2670, 2672 +}; + +static const short _regex_indicies[] = { + 0, 1, 3, 4, 5, 6, 7, 8, + 9, 10, 12, 13, 14, 15, 16, 17, + 18, 19, 19, 19, 19, 20, 11, 2, + 22, 23, 23, 23, 23, 21, 24, 25, + 25, 25, 25, 21, 27, 27, 27, 27, + 26, 28, 27, 27, 27, 27, 26, 29, + 26, 29, 30, 30, 30, 30, 26, 31, + 30, 32, 30, 30, 26, 29, 30, 32, + 30, 30, 26, 33, 33, 33, 33, 26, + 28, 33, 33, 33, 33, 26, 34, 35, + 36, 26, 37, 38, 26, 39, 40, 30, + 41, 30, 30, 26, 42, 42, 42, 42, + 26, 40, 42, 42, 42, 42, 26, 40, + 30, 41, 30, 30, 26, 43, 44, 21, + 45, 46, 47, 47, 47, 47, 21, 24, + 48, 48, 48, 48, 21, 49, 50, 21, + 48, 48, 48, 48, 21, 51, 51, 51, + 51, 21, 52, 51, 51, 51, 51, 21, + 23, 23, 23, 23, 21, 47, 47, 47, + 47, 21, 45, 53, 46, 54, 54, 54, + 54, 21, 57, 58, 55, 56, 57, 58, + 59, 56, 57, 58, 56, 61, 62, 55, + 60, 61, 62, 63, 60, 61, 62, 60, + 65, 66, 55, 64, 65, 66, 59, 64, + 65, 66, 64, 69, 68, 70, 67, 69, + 71, 72, 74, 73, 74, 74, 67, 75, + 67, 77, 76, 67, 77, 78, 67, 77, + 67, 74, 80, 79, 74, 74, 67, 74, + 80, 81, 74, 74, 67, 74, 80, 74, + 74, 74, 67, 74, 82, 74, 74, 74, + 67, 84, 84, 84, 84, 83, 85, 84, + 84, 84, 84, 83, 86, 86, 86, 86, + 83, 87, 86, 86, 86, 86, 83, 88, + 88, 88, 88, 83, 88, 89, 88, 88, + 88, 83, 91, 90, 92, 91, 90, 95, + 94, 94, 94, 93, 97, 99, 98, 96, + 101, 100, 96, 102, 100, 96, 104, 103, + 105, 103, 106, 103, 109, 108, 109, 110, + 108, 111, 108, 109, 112, 108, 113, 114, + 108, 115, 108, 117, 116, 118, 119, 116, + 120, 116, 121, 116, 122, 116, 123, 116, + 124, 116, 125, 116, 126, 116, 127, 116, + 128, 116, 129, 116, 130, 116, 131, 116, + 132, 116, 133, 134, 135, 116, 136, 116, + 137, 116, 138, 116, 139, 116, 140, 116, + 141, 116, 142, 116, 143, 116, 144, 116, + 145, 116, 146, 116, 147, 116, 148, 116, + 149, 116, 150, 116, 151, 116, 152, 116, + 153, 116, 154, 116, 155, 116, 156, 116, + 157, 158, 116, 159, 116, 160, 116, 161, + 116, 162, 116, 163, 116, 164, 165, 116, + 166, 116, 167, 116, 168, 116, 169, 116, + 170, 116, 171, 116, 172, 116, 174, 175, + 173, 176, 173, 177, 173, 178, 173, 179, + 173, 180, 173, 181, 173, 182, 173, 183, + 173, 184, 173, 185, 173, 186, 173, 187, + 173, 188, 173, 189, 173, 190, 173, 191, + 173, 192, 173, 193, 173, 194, 173, 195, + 196, 173, 197, 173, 198, 173, 199, 173, + 200, 173, 201, 173, 202, 173, 204, 203, + 205, 203, 206, 203, 207, 203, 208, 203, + 209, 203, 210, 173, 211, 173, 212, 173, + 213, 173, 214, 173, 215, 173, 216, 173, + 217, 218, 173, 219, 173, 220, 173, 221, + 173, 222, 173, 223, 173, 224, 173, 225, + 173, 226, 173, 227, 173, 228, 229, 116, + 230, 116, 231, 116, 232, 116, 233, 116, + 234, 116, 235, 116, 236, 116, 237, 116, + 238, 116, 239, 116, 240, 116, 241, 116, + 242, 116, 243, 116, 244, 116, 245, 116, + 246, 116, 247, 116, 248, 116, 249, 116, + 250, 116, 251, 116, 252, 116, 253, 116, + 254, 116, 255, 116, 256, 116, 257, 116, + 258, 116, 259, 116, 260, 116, 261, 116, + 262, 116, 263, 116, 264, 116, 265, 116, + 266, 116, 267, 116, 268, 116, 269, 116, + 270, 116, 271, 116, 272, 116, 273, 116, + 274, 116, 275, 116, 276, 116, 277, 116, + 278, 116, 279, 116, 280, 116, 281, 116, + 282, 116, 283, 116, 284, 116, 285, 116, + 286, 287, 116, 288, 116, 289, 116, 290, + 116, 291, 116, 292, 116, 293, 116, 294, + 116, 295, 116, 296, 116, 297, 116, 298, + 116, 300, 299, 301, 299, 302, 299, 303, + 299, 304, 299, 305, 116, 306, 116, 307, + 116, 308, 116, 309, 116, 310, 116, 311, + 116, 312, 116, 313, 116, 314, 116, 315, + 116, 316, 116, 317, 116, 318, 116, 319, + 116, 320, 116, 321, 116, 322, 116, 323, + 116, 324, 116, 325, 116, 326, 116, 327, + 116, 328, 116, 329, 330, 116, 331, 116, + 332, 116, 333, 116, 334, 116, 335, 116, + 336, 116, 337, 116, 338, 116, 339, 116, + 340, 116, 341, 116, 342, 116, 343, 116, + 344, 116, 345, 116, 346, 116, 347, 116, + 348, 116, 349, 116, 350, 351, 116, 352, + 116, 353, 116, 354, 116, 355, 116, 356, + 116, 357, 116, 358, 116, 359, 116, 360, + 116, 361, 116, 362, 116, 363, 116, 364, + 116, 365, 116, 366, 116, 367, 368, 369, + 370, 116, 371, 116, 372, 116, 373, 116, + 374, 116, 375, 116, 376, 116, 377, 116, + 378, 116, 379, 116, 380, 116, 381, 116, + 382, 116, 383, 116, 384, 116, 385, 116, + 386, 116, 387, 116, 388, 389, 116, 390, + 116, 391, 116, 392, 116, 393, 116, 394, + 116, 395, 116, 396, 116, 397, 116, 398, + 116, 400, 401, 399, 402, 399, 403, 399, + 404, 399, 405, 399, 406, 399, 407, 399, + 408, 409, 410, 399, 411, 399, 412, 399, + 413, 399, 414, 399, 415, 399, 416, 399, + 417, 399, 418, 399, 419, 420, 399, 421, + 399, 422, 399, 423, 399, 424, 399, 425, + 399, 426, 399, 428, 429, 427, 430, 427, + 431, 427, 432, 427, 433, 427, 434, 427, + 435, 427, 436, 427, 437, 427, 438, 427, + 439, 427, 441, 440, 442, 440, 443, 440, + 444, 440, 445, 440, 446, 440, 447, 440, + 448, 440, 449, 440, 450, 427, 451, 427, + 452, 427, 453, 427, 454, 427, 455, 427, + 456, 427, 457, 427, 458, 427, 459, 427, + 460, 427, 461, 427, 463, 462, 464, 462, + 465, 462, 466, 462, 467, 462, 468, 462, + 469, 462, 470, 462, 471, 462, 472, 462, + 473, 116, 474, 116, 475, 116, 476, 477, + 116, 478, 116, 479, 116, 480, 116, 481, + 116, 482, 116, 483, 116, 484, 485, 486, + 487, 116, 488, 116, 489, 116, 490, 116, + 491, 116, 492, 116, 493, 116, 494, 116, + 495, 116, 496, 116, 497, 116, 498, 116, + 499, 116, 500, 116, 501, 116, 502, 116, + 503, 116, 504, 116, 505, 116, 506, 116, + 507, 116, 508, 116, 509, 116, 510, 116, + 511, 116, 512, 116, 513, 116, 514, 116, + 515, 116, 516, 116, 517, 116, 518, 116, + 519, 116, 520, 116, 521, 116, 522, 116, + 523, 116, 525, 526, 524, 527, 524, 528, + 524, 529, 524, 530, 524, 531, 524, 532, + 524, 533, 524, 534, 524, 535, 524, 536, + 524, 537, 524, 538, 524, 539, 116, 540, + 116, 541, 116, 542, 116, 543, 116, 544, + 116, 545, 116, 547, 548, 546, 549, 546, + 550, 546, 551, 546, 552, 546, 553, 546, + 554, 546, 555, 546, 556, 546, 557, 546, + 558, 546, 559, 546, 560, 546, 561, 546, + 562, 546, 563, 546, 564, 546, 565, 546, + 566, 546, 567, 546, 568, 546, 569, 546, + 570, 546, 571, 546, 572, 546, 573, 546, + 574, 546, 575, 546, 576, 546, 577, 546, + 578, 546, 579, 580, 546, 581, 546, 582, + 546, 583, 546, 584, 546, 585, 546, 586, + 546, 587, 546, 588, 546, 589, 546, 590, + 546, 591, 546, 592, 546, 593, 594, 595, + 116, 596, 597, 116, 598, 116, 599, 116, + 600, 116, 601, 116, 602, 116, 603, 116, + 604, 116, 605, 116, 606, 607, 608, 116, + 609, 116, 610, 116, 611, 116, 612, 116, + 613, 116, 614, 116, 615, 116, 616, 116, + 617, 116, 618, 116, 619, 116, 620, 116, + 621, 116, 622, 116, 623, 624, 116, 625, + 116, 626, 116, 627, 628, 116, 629, 116, + 630, 116, 631, 116, 632, 116, 633, 116, + 634, 116, 635, 116, 636, 116, 637, 116, + 638, 116, 639, 116, 640, 116, 641, 116, + 642, 116, 643, 116, 644, 116, 645, 116, + 646, 116, 647, 116, 648, 116, 650, 649, + 652, 651, 653, 649, 649, 651, 656, 657, + 654, 655, 656, 657, 658, 655, 656, 657, + 655, 660, 661, 654, 662, 663, 664, 665, + 666, 667, 668, 669, 670, 671, 672, 673, + 659, 660, 661, 654, 659, 660, 661, 674, + 659, 660, 661, 659, 660, 661, 654, 675, + 676, 677, 678, 679, 680, 681, 682, 683, + 684, 685, 659, 660, 661, 654, 686, 687, + 659, 660, 661, 654, 688, 689, 659, 660, + 661, 654, 690, 659, 660, 661, 654, 691, + 659, 692, 661, 654, 659, 660, 661, 693, + 659, 660, 661, 654, 694, 659, 660, 661, + 654, 695, 659, 696, 661, 654, 659, 660, + 661, 697, 659, 660, 661, 654, 698, 659, + 660, 661, 654, 699, 659, 660, 661, 654, + 700, 659, 701, 661, 654, 659, 660, 661, + 702, 659, 660, 661, 654, 703, 659, 660, + 661, 654, 704, 659, 660, 661, 654, 705, + 659, 660, 661, 654, 706, 659, 707, 661, + 654, 659, 660, 661, 708, 659, 660, 661, + 654, 709, 659, 660, 661, 654, 710, 659, + 660, 661, 654, 711, 659, 660, 661, 654, + 712, 659, 713, 661, 654, 659, 660, 661, + 714, 659, 660, 661, 654, 715, 659, 660, + 661, 654, 716, 659, 660, 661, 654, 717, + 659, 660, 661, 654, 718, 659, 719, 661, + 654, 659, 660, 661, 720, 659, 660, 661, + 654, 721, 659, 660, 661, 654, 722, 659, + 660, 661, 654, 723, 659, 660, 661, 654, + 724, 659, 725, 661, 654, 659, 660, 661, + 726, 659, 660, 661, 654, 727, 659, 660, + 661, 654, 728, 659, 660, 661, 654, 729, + 659, 660, 661, 654, 730, 659, 731, 661, + 654, 659, 660, 661, 732, 659, 660, 661, + 654, 733, 734, 659, 660, 661, 654, 735, + 659, 660, 661, 654, 736, 659, 660, 661, + 654, 737, 659, 738, 661, 654, 659, 660, + 661, 739, 659, 660, 661, 654, 740, 659, + 660, 661, 654, 741, 659, 660, 661, 654, + 742, 659, 743, 661, 654, 659, 660, 661, + 744, 659, 660, 661, 654, 745, 659, 660, + 661, 654, 746, 659, 660, 661, 654, 747, + 659, 660, 661, 654, 748, 659, 749, 661, + 654, 659, 660, 661, 750, 659, 660, 661, + 654, 751, 659, 660, 661, 654, 752, 659, + 660, 661, 654, 753, 659, 660, 661, 654, + 754, 659, 755, 661, 654, 659, 660, 661, + 756, 659, 660, 661, 654, 757, 659, 660, + 661, 654, 758, 659, 660, 661, 654, 759, + 659, 760, 661, 654, 659, 660, 661, 761, + 659, 660, 661, 654, 762, 659, 660, 661, + 654, 763, 659, 660, 661, 654, 764, 659, + 660, 661, 654, 765, 659, 660, 661, 654, + 766, 659, 767, 661, 654, 659, 660, 661, + 768, 659, 660, 661, 654, 769, 770, 659, + 660, 661, 654, 771, 772, 659, 660, 661, + 654, 773, 659, 660, 661, 654, 774, 659, + 775, 661, 654, 659, 660, 661, 776, 659, + 660, 661, 654, 777, 659, 660, 661, 654, + 778, 659, 779, 661, 654, 659, 660, 661, + 780, 659, 660, 661, 654, 781, 659, 660, + 661, 654, 782, 659, 660, 661, 654, 783, + 659, 784, 661, 654, 659, 660, 661, 785, + 659, 660, 661, 654, 786, 659, 660, 661, + 654, 787, 659, 660, 661, 654, 788, 659, + 660, 661, 654, 789, 659, 790, 661, 654, + 659, 660, 661, 791, 659, 660, 661, 654, + 792, 659, 660, 661, 654, 793, 659, 660, + 661, 654, 794, 659, 660, 661, 654, 795, + 659, 796, 661, 654, 659, 660, 661, 797, + 659, 660, 661, 654, 798, 659, 660, 661, + 654, 799, 659, 660, 661, 654, 800, 659, + 660, 661, 654, 801, 659, 802, 661, 654, + 659, 660, 661, 803, 659, 660, 661, 654, + 804, 659, 660, 661, 654, 805, 659, 660, + 661, 654, 806, 659, 660, 661, 654, 807, + 659, 808, 661, 654, 659, 660, 661, 809, + 659, 660, 661, 654, 810, 659, 660, 661, + 654, 811, 659, 660, 661, 654, 812, 659, + 660, 661, 654, 813, 659, 814, 661, 654, + 659, 660, 661, 815, 659, 660, 661, 654, + 816, 817, 659, 660, 661, 654, 818, 659, + 660, 661, 654, 819, 659, 660, 661, 654, + 820, 659, 821, 661, 654, 659, 660, 661, + 822, 659, 660, 661, 654, 823, 659, 660, + 661, 654, 824, 659, 660, 661, 654, 825, + 659, 826, 661, 654, 659, 660, 661, 827, + 659, 660, 661, 654, 828, 659, 660, 661, + 654, 829, 659, 660, 661, 654, 830, 659, + 660, 661, 654, 831, 659, 832, 661, 654, + 659, 660, 661, 833, 659, 660, 661, 654, + 834, 659, 660, 661, 654, 835, 659, 660, + 661, 654, 836, 659, 660, 661, 654, 837, + 659, 838, 661, 654, 659, 660, 661, 839, + 659, 660, 661, 654, 840, 659, 660, 661, + 654, 841, 659, 660, 661, 654, 842, 659, + 843, 661, 654, 659, 660, 661, 844, 659, + 660, 661, 654, 845, 659, 660, 661, 654, + 846, 659, 660, 661, 654, 847, 659, 660, + 661, 654, 848, 659, 660, 661, 654, 849, + 659, 850, 661, 654, 659, 660, 661, 851, + 659, 853, 854, 654, 852, 853, 854, 658, + 852, 853, 854, 852, 856, 855, 857, 856, + 855, 860, 859, 859, 859, 858, 862, 861, + 863, 861, 864, 861, 866, 865, 867, 865, + 868, 865, 870, 869, 871, 869, 872, 869, + 873, 876, 877, 878, 879, 880, 881, 882, + 883, 884, 885, 886, 887, 888, 875, 893, + 875, 876, 875, 875, 889, 890, 891, 892, + 889, 874, 895, 896, 894, 23, 23, 23, + 23, 897, 25, 25, 25, 25, 897, 899, + 30, 902, 903, 904, 30, 900, 901, 30, + 30, 898, 44, 897, 47, 47, 47, 47, + 44, 897, 43, 44, 897, 905, 906, 48, + 48, 48, 48, 897, 907, 897, 49, 50, + 897, 908, 909, 910, 897, 45, 53, 46, + 54, 54, 54, 54, 897, 912, 913, 911, + 915, 916, 914, 918, 919, 917, 56, 60, + 64, 920, 923, 926, 927, 928, 929, 930, + 931, 932, 933, 934, 934, 935, 936, 937, + 938, 934, 939, 940, 941, 942, 943, 944, + 945, 946, 947, 948, 949, 950, 951, 934, + 952, 953, 954, 955, 956, 957, 934, 958, + 959, 960, 961, 924, 925, 922, 963, 962, + 964, 962, 966, 967, 965, 969, 967, 968, + 967, 970, 973, 972, 975, 68, 977, 71, + 979, 978, 976, 981, 980, 982, 980, 984, + 983, 985, 983, 987, 988, 989, 986, 991, + 990, 994, 993, 999, 996, 997, 998, 995, + 1000, 1001, 1002, 995, 94, 94, 94, 1003, + 98, 1004, 1006, 1007, 1005, 1009, 1008, 1010, + 1008, 1011, 1008, 1013, 1014, 1012, 109, 108, + 109, 1016, 1017, 108, 1019, 1020, 1021, 1022, + 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, + 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, + 1039, 1040, 1041, 1018, 1043, 1044, 1045, 1042, + 1046, 1047, 1048, 1049, 1050, 1042, 1052, 1053, + 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1051, + 1062, 1063, 1061, 1064, 1042, 1065, 1066, 1042, + 1067, 1068, 1069, 1070, 1071, 1042, 1072, 1073, + 1074, 1042, 1076, 1077, 1075, 1078, 1079, 1042, + 1080, 1042, 1081, 1082, 1042, 1084, 1085, 1086, + 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1083, + 1095, 1096, 1097, 1098, 1099, 1100, 1094, 1102, + 1101, 1104, 1105, 1106, 1107, 1108, 1103, 1109, + 1110, 1111, 1112, 1042, 1114, 1115, 1116, 1117, + 1118, 1119, 1120, 1121, 1113, 1122, 1123, 1042, + 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, + 1133, 1124, 1134, 1135, 1136, 1137, 1042, 1138, + 1042, 1139, 1042, 1140, 1141, 1142, 1143, 1042, + 1144, 1042, 1146, 1147, 1148, 1145, 649, 1150, + 1151, 1152, 1153, 1154, 1155, 1156, 1149, 1158, + 1159, 1160, 1161, 1157, 1162, 1163, 1164, 1165, + 1162, 874, 655, 1167, 852, 1166, 1169, 1173, + 1174, 1175, 1176, 1176, 1177, 1178, 1179, 1176, + 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, + 1188, 1189, 1176, 1190, 1191, 1192, 1193, 1194, + 1195, 1176, 1196, 1197, 1198, 1170, 1171, 1172, + 1172, 1168, 1200, 1199, 1201, 1199, 1203, 1202, + 1204, 1202, 1207, 1206, 1209, 1211, 1210, 1214, + 1213, 1219, 1216, 1217, 1218, 1215, 1220, 1221, + 1222, 1215, 859, 859, 859, 1223, 1225, 1224, + 1226, 1224, 1227, 1224, 1229, 1230, 1231, 1228, + 1228, 1228, 874, 1233, 1234, 1232, 1236, 1235, + 1237, 1238, 1239, 1240, 1237, 874, 1242, 1241, + 1244, 1243, 1245, 1243, 1246, 1243, 1248, 1247, + 1249, 1250, 1251, 1252, 1249, 874, 1254, 1253, + 1256, 1255, 1257, 1255, 1258, 1255, 1260, 1259, + 1262, 1261, 0 +}; + +static const short _regex_trans_targs[] = { + 746, 746, 746, 746, 746, 748, 749, 750, + 746, 751, 752, 753, 746, 754, 746, 746, + 755, 756, 757, 758, 746, 746, 746, 3, + 746, 4, 746, 6, 7, 746, 8, 746, + 9, 12, 746, 14, 746, 746, 746, 16, + 746, 18, 17, 746, 19, 746, 746, 20, + 21, 746, 22, 25, 746, 27, 28, 746, + 29, 30, 31, 746, 32, 33, 34, 746, + 35, 36, 37, 746, 38, 746, 772, 40, + 42, 46, 49, 43, 44, 746, 45, 47, + 746, 48, 746, 746, 51, 746, 53, 746, + 55, 746, 746, 57, 746, 746, 58, 746, + 746, 60, 59, 783, 61, 783, 783, 746, + 746, 64, 746, 787, 65, 787, 67, 787, + 69, 787, 70, 787, 790, 790, 73, 76, + 74, 75, 790, 77, 78, 79, 80, 790, + 82, 83, 84, 85, 790, 87, 92, 94, + 88, 89, 90, 91, 790, 93, 790, 95, + 790, 97, 98, 99, 100, 790, 102, 103, + 104, 105, 106, 790, 108, 109, 111, 110, + 790, 112, 113, 790, 115, 120, 116, 117, + 118, 119, 790, 121, 790, 790, 123, 139, + 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 790, + 140, 141, 790, 143, 144, 790, 145, 146, + 147, 148, 790, 790, 150, 151, 790, 153, + 154, 790, 156, 157, 158, 159, 160, 161, + 790, 163, 167, 164, 165, 166, 790, 168, + 169, 170, 171, 790, 173, 177, 174, 175, + 176, 790, 178, 179, 180, 181, 182, 183, + 790, 185, 186, 187, 188, 189, 190, 191, + 192, 193, 194, 195, 196, 197, 198, 199, + 200, 201, 790, 203, 204, 205, 206, 207, + 790, 209, 210, 211, 212, 213, 790, 215, + 216, 217, 218, 219, 220, 221, 790, 223, + 224, 225, 790, 227, 228, 790, 230, 235, + 231, 232, 233, 234, 790, 236, 237, 238, + 239, 790, 799, 790, 242, 790, 244, 245, + 790, 247, 248, 249, 790, 251, 252, 253, + 254, 255, 790, 257, 258, 259, 260, 261, + 262, 263, 264, 265, 266, 267, 268, 269, + 790, 271, 277, 272, 273, 274, 275, 276, + 790, 278, 279, 280, 281, 282, 283, 284, + 285, 286, 287, 288, 289, 290, 291, 295, + 292, 293, 294, 790, 296, 297, 298, 299, + 790, 301, 302, 303, 304, 305, 790, 307, + 310, 314, 319, 308, 309, 790, 311, 312, + 313, 790, 315, 316, 317, 318, 790, 320, + 321, 322, 323, 790, 325, 332, 326, 327, + 328, 329, 330, 331, 790, 333, 790, 790, + 790, 335, 336, 790, 338, 339, 340, 790, + 342, 344, 349, 343, 790, 345, 346, 347, + 348, 790, 790, 351, 354, 352, 353, 790, + 355, 356, 790, 790, 358, 364, 359, 360, + 361, 362, 363, 790, 365, 366, 367, 790, + 790, 369, 370, 371, 372, 373, 374, 375, + 376, 790, 378, 379, 380, 381, 382, 383, + 790, 385, 386, 387, 388, 790, 790, 390, + 391, 392, 393, 394, 395, 396, 397, 790, + 790, 400, 401, 790, 403, 408, 404, 405, + 406, 407, 790, 409, 410, 415, 421, 433, + 411, 412, 413, 414, 790, 416, 417, 418, + 419, 420, 790, 422, 423, 424, 425, 426, + 427, 428, 429, 430, 431, 432, 790, 434, + 435, 436, 437, 790, 439, 440, 790, 442, + 443, 444, 445, 790, 790, 447, 452, 448, + 449, 450, 451, 790, 453, 454, 455, 456, + 457, 458, 790, 460, 461, 462, 790, 464, + 465, 790, 790, 467, 473, 468, 469, 470, + 471, 472, 790, 474, 475, 476, 477, 478, + 479, 790, 481, 482, 483, 484, 790, 486, + 487, 488, 489, 790, 491, 492, 493, 494, + 495, 496, 790, 498, 507, 499, 500, 501, + 502, 503, 504, 505, 506, 790, 508, 509, + 790, 511, 519, 528, 512, 515, 513, 514, + 790, 516, 517, 518, 790, 520, 521, 522, + 525, 790, 523, 524, 790, 526, 527, 790, + 529, 790, 531, 532, 533, 790, 535, 536, + 790, 537, 790, 539, 543, 540, 541, 542, + 790, 544, 545, 546, 547, 790, 549, 550, + 551, 552, 553, 790, 790, 790, 790, 790, + 790, 0, 560, 561, 562, 817, 819, 563, + 564, 565, 819, 567, 568, 569, 570, 651, + 666, 672, 678, 684, 690, 696, 707, 713, + 719, 724, 819, 571, 586, 592, 598, 604, + 610, 616, 627, 633, 639, 644, 572, 581, + 573, 577, 574, 575, 576, 819, 578, 579, + 580, 819, 582, 583, 584, 585, 819, 587, + 588, 589, 590, 591, 819, 593, 594, 595, + 596, 597, 819, 599, 600, 601, 602, 603, + 819, 605, 606, 607, 608, 609, 819, 611, + 612, 613, 614, 615, 819, 617, 622, 618, + 619, 620, 621, 819, 623, 624, 625, 626, + 819, 628, 629, 630, 631, 632, 819, 634, + 635, 636, 637, 638, 819, 640, 641, 642, + 643, 819, 645, 646, 647, 648, 649, 650, + 819, 652, 661, 653, 657, 654, 655, 656, + 819, 658, 659, 660, 819, 662, 663, 664, + 665, 819, 667, 668, 669, 670, 671, 819, + 673, 674, 675, 676, 677, 819, 679, 680, + 681, 682, 683, 819, 685, 686, 687, 688, + 689, 819, 691, 692, 693, 694, 695, 819, + 697, 702, 698, 699, 700, 701, 819, 703, + 704, 705, 706, 819, 708, 709, 710, 711, + 712, 819, 714, 715, 716, 717, 718, 819, + 720, 721, 722, 723, 819, 725, 726, 727, + 728, 729, 730, 819, 731, 732, 733, 819, + 735, 819, 819, 736, 819, 819, 819, 739, + 819, 838, 838, 742, 838, 843, 843, 745, + 843, 746, 0, 746, 746, 746, 747, 746, + 759, 760, 746, 761, 762, 763, 746, 782, + 746, 746, 784, 785, 786, 746, 746, 1, + 2, 746, 746, 5, 9, 10, 11, 13, + 15, 746, 746, 746, 23, 24, 26, 746, + 746, 746, 746, 746, 746, 746, 746, 746, + 746, 746, 746, 764, 766, 768, 746, 746, + 746, 746, 746, 746, 746, 746, 746, 769, + 746, 746, 746, 746, 746, 746, 746, 746, + 746, 770, 746, 746, 746, 771, 746, 776, + 746, 777, 778, 746, 746, 746, 746, 746, + 779, 746, 746, 765, 746, 746, 767, 768, + 746, 768, 746, 746, 746, 746, 746, 746, + 746, 39, 774, 41, 746, 773, 746, 746, + 775, 746, 746, 50, 52, 54, 746, 56, + 746, 746, 746, 746, 780, 780, 780, 781, + 746, 746, 746, 746, 746, 746, 746, 746, + 746, 746, 62, 63, 788, 787, 789, 787, + 66, 68, 790, 791, 792, 793, 795, 796, + 797, 798, 800, 801, 802, 803, 804, 806, + 807, 808, 809, 810, 811, 812, 813, 814, + 815, 816, 790, 71, 72, 81, 86, 96, + 101, 107, 114, 790, 122, 790, 790, 142, + 790, 794, 790, 155, 162, 790, 149, 152, + 172, 184, 202, 208, 214, 222, 226, 229, + 240, 246, 250, 790, 241, 243, 256, 270, + 300, 306, 324, 790, 790, 334, 337, 341, + 790, 790, 790, 790, 790, 350, 790, 357, + 790, 805, 790, 377, 384, 790, 368, 790, + 790, 389, 398, 790, 790, 399, 402, 438, + 441, 790, 790, 790, 790, 790, 446, 790, + 790, 790, 459, 463, 790, 466, 790, 480, + 485, 790, 790, 790, 490, 497, 510, 530, + 534, 538, 548, 554, 555, 556, 557, 558, + 790, 790, 790, 790, 790, 818, 818, 818, + 818, 818, 818, 818, 818, 819, 819, 820, + 821, 819, 819, 833, 834, 835, 819, 566, + 819, 822, 824, 819, 819, 819, 819, 819, + 819, 826, 819, 819, 819, 819, 819, 819, + 827, 819, 819, 819, 819, 819, 819, 828, + 829, 819, 819, 819, 819, 819, 830, 819, + 823, 819, 819, 825, 819, 819, 819, 819, + 819, 819, 819, 734, 819, 819, 819, 819, + 831, 831, 831, 832, 819, 819, 819, 819, + 819, 819, 737, 738, 836, 837, 836, 836, + 836, 836, 836, 838, 839, 838, 840, 841, + 842, 838, 838, 838, 838, 740, 741, 843, + 844, 843, 845, 846, 847, 843, 843, 843, + 843, 743, 744, 848, 848, 849, 849 +}; + +static const short _regex_trans_actions[] = { + 827, 631, 765, 731, 723, 45, 903, 903, + 897, 45, 912, 45, 900, 903, 729, 741, + 0, 45, 45, 923, 737, 841, 747, 0, + 743, 3, 839, 3, 0, 761, 3, 759, + 870, 3, 751, 0, 749, 755, 753, 0, + 757, 3, 0, 745, 0, 725, 727, 27, + 3, 763, 0, 3, 673, 0, 25, 829, + 0, 0, 0, 603, 0, 0, 0, 601, + 0, 0, 0, 831, 0, 675, 17, 0, + 7, 870, 3, 17, 17, 663, 17, 870, + 661, 870, 665, 837, 3, 671, 3, 669, + 3, 667, 833, 0, 677, 835, 0, 679, + 845, 0, 11, 29, 13, 31, 0, 843, + 769, 0, 771, 59, 0, 53, 0, 51, + 0, 49, 0, 47, 359, 315, 0, 0, + 0, 0, 127, 0, 0, 0, 0, 129, + 0, 0, 0, 0, 131, 0, 0, 0, + 0, 0, 0, 0, 133, 0, 135, 0, + 137, 0, 0, 0, 0, 139, 0, 0, + 0, 0, 0, 141, 0, 0, 0, 0, + 143, 0, 0, 145, 0, 0, 0, 0, + 0, 0, 147, 0, 149, 341, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 151, + 0, 0, 153, 0, 0, 155, 0, 0, + 0, 0, 157, 343, 0, 0, 159, 0, + 0, 161, 0, 0, 0, 0, 0, 0, + 163, 0, 0, 0, 0, 0, 165, 0, + 0, 0, 0, 167, 0, 0, 0, 0, + 0, 169, 0, 0, 0, 0, 0, 0, + 171, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 173, 0, 0, 0, 0, 0, + 175, 0, 0, 0, 0, 0, 177, 0, + 0, 0, 0, 0, 0, 0, 179, 0, + 0, 0, 181, 0, 0, 183, 0, 0, + 0, 0, 0, 0, 185, 0, 0, 0, + 0, 187, 45, 357, 0, 189, 0, 0, + 191, 0, 0, 0, 193, 0, 0, 0, + 0, 0, 195, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 197, 0, 0, 0, 0, 0, 0, 0, + 199, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 201, 0, 0, 0, 0, + 203, 0, 0, 0, 0, 0, 205, 0, + 0, 0, 0, 0, 0, 207, 0, 0, + 0, 209, 0, 0, 0, 0, 211, 0, + 0, 0, 0, 213, 0, 0, 0, 0, + 0, 0, 0, 0, 215, 0, 217, 345, + 219, 0, 0, 221, 0, 0, 0, 223, + 0, 0, 0, 0, 225, 0, 0, 0, + 0, 227, 229, 0, 0, 0, 0, 231, + 0, 0, 233, 347, 0, 0, 0, 0, + 0, 0, 0, 235, 0, 0, 0, 237, + 349, 0, 0, 0, 0, 0, 0, 0, + 0, 239, 0, 0, 0, 0, 0, 0, + 241, 0, 0, 0, 0, 243, 351, 0, + 0, 0, 0, 0, 0, 0, 0, 245, + 247, 0, 0, 249, 0, 0, 0, 0, + 0, 0, 251, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 253, 0, 0, 0, + 0, 0, 255, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 257, 0, + 0, 0, 0, 259, 0, 0, 261, 0, + 0, 0, 0, 263, 353, 0, 0, 0, + 0, 0, 0, 265, 0, 0, 0, 0, + 0, 0, 267, 0, 0, 0, 269, 0, + 0, 271, 355, 0, 0, 0, 0, 0, + 0, 0, 273, 0, 0, 0, 0, 0, + 0, 275, 0, 0, 0, 0, 277, 0, + 0, 0, 0, 279, 0, 0, 0, 0, + 0, 0, 281, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 283, 0, 0, + 285, 0, 0, 0, 0, 0, 0, 0, + 287, 0, 0, 0, 289, 0, 0, 0, + 0, 291, 0, 0, 293, 0, 0, 295, + 0, 297, 0, 0, 0, 299, 0, 0, + 303, 0, 301, 0, 0, 0, 0, 0, + 305, 0, 0, 0, 0, 307, 0, 0, + 0, 0, 0, 309, 311, 119, 121, 123, + 125, 39, 0, 35, 33, 37, 539, 0, + 0, 0, 377, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 435, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 381, 0, 0, + 0, 385, 0, 0, 0, 0, 389, 0, + 0, 0, 0, 0, 393, 0, 0, 0, + 0, 0, 397, 0, 0, 0, 0, 0, + 401, 0, 0, 0, 0, 0, 405, 0, + 0, 0, 0, 0, 409, 0, 0, 0, + 0, 0, 0, 413, 0, 0, 0, 0, + 417, 0, 0, 0, 0, 0, 421, 0, + 0, 0, 0, 0, 425, 0, 0, 0, + 0, 429, 0, 0, 0, 0, 0, 0, + 433, 0, 0, 0, 0, 0, 0, 0, + 379, 0, 0, 0, 383, 0, 0, 0, + 0, 387, 0, 0, 0, 0, 0, 391, + 0, 0, 0, 0, 0, 395, 0, 0, + 0, 0, 0, 399, 0, 0, 0, 0, + 0, 403, 0, 0, 0, 0, 0, 407, + 0, 0, 0, 0, 0, 0, 411, 0, + 0, 0, 0, 415, 0, 0, 0, 0, + 0, 419, 0, 0, 0, 0, 0, 423, + 0, 0, 0, 0, 427, 0, 0, 0, + 0, 0, 0, 431, 0, 0, 0, 533, + 0, 471, 535, 0, 475, 537, 503, 0, + 505, 569, 557, 0, 559, 587, 575, 0, + 577, 633, 0, 777, 775, 637, 45, 597, + 0, 0, 609, 0, 45, 0, 635, 909, + 599, 773, 0, 45, 45, 629, 779, 0, + 0, 821, 819, 1, 855, 855, 1, 0, + 3, 735, 733, 739, 1, 1, 0, 783, + 615, 613, 785, 619, 617, 787, 623, 621, + 781, 817, 721, 5, 852, 915, 639, 647, + 611, 695, 607, 717, 699, 715, 683, 0, + 605, 713, 691, 703, 687, 719, 641, 657, + 645, 0, 693, 659, 655, 906, 697, 45, + 651, 45, 0, 653, 689, 649, 701, 685, + 7, 643, 791, 15, 867, 795, 858, 919, + 793, 927, 847, 811, 711, 709, 809, 681, + 801, 7, 17, 849, 799, 17, 876, 797, + 17, 873, 815, 1, 1, 1, 803, 0, + 813, 707, 705, 805, 19, 23, 21, 45, + 882, 894, 888, 807, 825, 789, 627, 625, + 823, 767, 0, 0, 45, 55, 45, 57, + 0, 0, 317, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 0, 0, 339, 0, 0, 0, 0, 0, + 0, 0, 0, 319, 0, 61, 63, 0, + 65, 45, 67, 0, 0, 321, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 337, 0, 0, 0, 0, + 0, 0, 0, 323, 79, 0, 0, 0, + 69, 71, 73, 75, 77, 0, 325, 0, + 81, 45, 83, 0, 0, 327, 0, 329, + 85, 0, 0, 87, 89, 0, 0, 0, + 0, 331, 91, 93, 95, 97, 0, 99, + 101, 103, 0, 0, 333, 0, 105, 0, + 0, 107, 109, 111, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 313, 335, 113, 115, 117, 375, 361, 363, + 365, 367, 369, 371, 373, 509, 491, 45, + 0, 511, 507, 0, 45, 45, 531, 0, + 499, 5, 9, 473, 497, 489, 439, 457, + 493, 0, 437, 485, 461, 481, 451, 441, + 0, 487, 453, 449, 495, 455, 445, 45, + 0, 447, 483, 443, 459, 479, 7, 517, + 15, 861, 519, 15, 864, 513, 469, 467, + 527, 477, 521, 0, 515, 465, 463, 523, + 19, 23, 21, 45, 879, 891, 885, 525, + 529, 501, 0, 0, 549, 0, 543, 541, + 551, 547, 545, 563, 0, 561, 0, 45, + 45, 567, 553, 565, 555, 0, 0, 581, + 0, 579, 0, 45, 45, 585, 571, 583, + 573, 0, 0, 591, 589, 595, 593 +}; + +static const short _regex_to_state_actions[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 41, + 0, 41, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 41, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 41, 0, 0, 41, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 41, 41, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 41, 0, 41, 0, + 0, 0, 0, 41, 0, 0, 0, 0, + 41, 41 +}; + +static const short _regex_from_state_actions[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 43, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 43, 0, 0, 43, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 43, 43, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 43, 0, 43, 0, + 0, 0, 0, 43, 0, 0, 0, 0, + 43, 43 +}; + +static const short _regex_eof_actions[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 39, + 39, 39, 39, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0 +}; + +static const short _regex_eof_trans[] = { + 0, 1, 1, 22, 22, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 84, 84, 84, 84, 84, 84, + 91, 91, 94, 97, 97, 97, 104, 104, + 104, 108, 108, 108, 108, 108, 108, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 174, 174, 174, 174, 174, 174, + 174, 174, 174, 174, 174, 174, 174, 174, + 174, 174, 174, 174, 174, 174, 174, 174, + 174, 174, 174, 174, 174, 204, 204, 204, + 204, 204, 204, 174, 174, 174, 174, 174, + 174, 174, 174, 174, 174, 174, 174, 174, + 174, 174, 174, 174, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 300, 300, 300, 300, 300, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 400, 400, + 400, 400, 400, 400, 400, 400, 400, 400, + 400, 400, 400, 400, 400, 400, 400, 400, + 400, 400, 400, 400, 400, 428, 428, 428, + 428, 428, 428, 428, 428, 428, 428, 428, + 441, 441, 441, 441, 441, 441, 441, 441, + 441, 428, 428, 428, 428, 428, 428, 428, + 428, 428, 428, 428, 428, 463, 463, 463, + 463, 463, 463, 463, 463, 463, 463, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 525, 525, + 525, 525, 525, 525, 525, 525, 525, 525, + 525, 525, 525, 117, 117, 117, 117, 117, + 117, 117, 547, 547, 547, 547, 547, 547, + 547, 547, 547, 547, 547, 547, 547, 547, + 547, 547, 547, 547, 547, 547, 547, 547, + 547, 547, 547, 547, 547, 547, 547, 547, + 547, 547, 547, 547, 547, 547, 547, 547, + 547, 547, 547, 547, 547, 547, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 117, 117, 117, 117, 0, + 0, 0, 0, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 655, 655, + 655, 655, 655, 655, 655, 655, 856, 856, + 859, 862, 862, 862, 866, 866, 866, 870, + 870, 870, 0, 895, 898, 898, 899, 898, + 898, 898, 898, 898, 898, 898, 898, 912, + 915, 918, 921, 922, 963, 963, 966, 969, + 971, 972, 975, 977, 981, 981, 984, 984, + 987, 991, 993, 996, 996, 1004, 1005, 1006, + 1009, 1009, 1009, 0, 1016, 1016, 0, 1043, + 1043, 1052, 1062, 1043, 1043, 1043, 1043, 1076, + 1043, 1043, 1043, 1084, 1095, 1102, 1104, 1043, + 1114, 1043, 1125, 1043, 1043, 1043, 1043, 1043, + 1146, 0, 0, 0, 1167, 1167, 1200, 1200, + 1203, 1203, 1206, 1209, 1211, 1213, 1216, 1216, + 1224, 1225, 1225, 1225, 0, 1233, 0, 1242, + 1244, 1244, 1244, 0, 1254, 1256, 1256, 1256, + 0, 0 +}; + +static const int regex_start = 746; +static const int regex_error = 0; + +static const int regex_en_readVerb = 787; +static const int regex_en_readUCP = 790; +static const int regex_en_readBracedUCP = 559; +static const int regex_en_readUCPSingle = 818; +static const int regex_en_charClassGuts = 819; +static const int regex_en_readClass = 836; +static const int regex_en_readQuotedLiteral = 838; +static const int regex_en_readQuotedClass = 843; +static const int regex_en_readComment = 848; +static const int regex_en_readNewlineTerminatedComment = 849; +static const int regex_en_main = 746; + + +#line 1912 "Parser.rl" + +/** \brief Main parser call, returns root Component or nullptr. */ +unique_ptr parse(const char *ptr, ParseMode &globalMode) { + assert(ptr); + + const char *p = ptr; + const char *pe = ptr + strlen(ptr); + + // First, read the control verbs, set any global mode flags and move the + // ptr forward. + p = read_control_verbs(p, pe, 0, globalMode); + + const char *eof = pe; + int cs; + UNUSED int act; + int top; + vector stack; + const char *ts, *te; + unichar accumulator = 0; + unichar octAccumulator = 0; /* required as we are also accumulating for + * back ref when looking for octals */ + unsigned repeatN = 0; + unsigned repeatM = 0; + string label; + + ParseMode mode = globalMode; + ParseMode newMode; + + bool negated = false; + bool inComment = false; + + // Stack of sequences and flags used to store state when we enter + // sub-sequences. + vector sequences; + + // Index of the next capturing group. Note that zero is reserved for the + // root sequence. + unsigned groupIndex = 1; + + // Set storing group names that are currently in use. + flat_set groupNames; + + // Root sequence. + unique_ptr rootSeq = std::make_unique(); + rootSeq->setCaptureIndex(0); + + // Current sequence being appended to + ComponentSequence *currentSeq = rootSeq.get(); + + // The current character class being appended to. This is used as the + // accumulator for both character class and UCP properties. + unique_ptr currentCls; + + // True if the machine is currently inside a character class, i.e. square + // brackets [..]. + bool inCharClass = false; + + // True if the machine is inside a character class but it has not processed + // any "real" elements yet, i.e. it's still processing meta-characters like + // '^'. + bool inCharClassEarly = false; + + // Location at which the current character class began. + const char *currentClsBegin = p; + + // We throw exceptions on various parsing failures beyond this point: we + // use a try/catch block here to clean up our allocated memory before we + // re-throw the exception to the caller. + try { + // Embed the Ragel machine here + +#line 2533 "Parser.cpp" + { + cs = regex_start; + top = 0; + ts = 0; + te = 0; + act = 0; + } + +#line 1983 "Parser.rl" + +#line 2544 "Parser.cpp" + { + int _klen; + unsigned int _trans; + short _widec; + const short *_acts; + unsigned int _nacts; + const short *_keys; + + if ( p == pe ) + goto _test_eof; + if ( cs == 0 ) + goto _out; +_resume: + _acts = _regex_actions + _regex_from_state_actions[cs]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) { + switch ( *_acts++ ) { + case 24: +#line 1 "NONE" + {ts = p;} + break; +#line 2566 "Parser.cpp" + } + } + + _widec = (*p); + _klen = _regex_cond_lengths[cs]; + _keys = _regex_cond_keys + (_regex_cond_offsets[cs]*2); + if ( _klen > 0 ) { + const short *_lower = _keys; + const short *_mid; + const short *_upper = _keys + (_klen<<1) - 2; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( _widec < _mid[0] ) + _upper = _mid - 2; + else if ( _widec > _mid[1] ) + _lower = _mid + 2; + else { + switch ( _regex_cond_spaces[_regex_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { + case 0: { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 475 "Parser.rl" + mode.utf8 ) _widec += 256; + break; + } + case 1: { + _widec = (short)(1152 + ((*p) - -128)); + if ( +#line 476 "Parser.rl" + mode.ignore_space ) _widec += 256; + break; + } + case 2: { + _widec = (short)(640 + ((*p) - -128)); + if ( +#line 477 "Parser.rl" + inCharClassEarly ) _widec += 256; + break; + } + } + break; + } + } + } + + _keys = _regex_trans_keys + _regex_key_offsets[cs]; + _trans = _regex_index_offsets[cs]; + + _klen = _regex_single_lengths[cs]; + if ( _klen > 0 ) { + const short *_lower = _keys; + const short *_mid; + const short *_upper = _keys + _klen - 1; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + ((_upper-_lower) >> 1); + if ( _widec < *_mid ) + _upper = _mid - 1; + else if ( _widec > *_mid ) + _lower = _mid + 1; + else { + _trans += (unsigned int)(_mid - _keys); + goto _match; + } + } + _keys += _klen; + _trans += _klen; + } + + _klen = _regex_range_lengths[cs]; + if ( _klen > 0 ) { + const short *_lower = _keys; + const short *_mid; + const short *_upper = _keys + (_klen<<1) - 2; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( _widec < _mid[0] ) + _upper = _mid - 2; + else if ( _widec > _mid[1] ) + _lower = _mid + 2; + else { + _trans += (unsigned int)((_mid - _keys)>>1); + goto _match; + } + } + _trans += _klen; + } + +_match: + _trans = _regex_indicies[_trans]; +_eof_trans: + cs = _regex_trans_targs[_trans]; + + if ( _regex_trans_actions[_trans] == 0 ) + goto _again; + + _acts = _regex_actions + _regex_trans_actions[_trans]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + { + switch ( *_acts++ ) + { + case 0: +#line 285 "Parser.rl" + { label.clear();} + break; + case 1: +#line 286 "Parser.rl" + { label.push_back((*p));} + break; + case 2: +#line 287 "Parser.rl" + { octAccumulator = 0;} + break; + case 3: +#line 288 "Parser.rl" + { accumulator = 0;} + break; + case 4: +#line 289 "Parser.rl" + { + octAccumulator = 0; + pushOct(&octAccumulator, (*p)); + } + break; + case 5: +#line 293 "Parser.rl" + { + accumulator = 0; + pushDec(&accumulator, (*p)); + } + break; + case 6: +#line 297 "Parser.rl" + { repeatN = 0; repeatM = 0; } + break; + case 7: +#line 298 "Parser.rl" + { pushDec(&repeatN, (*p)); } + break; + case 8: +#line 299 "Parser.rl" + { pushDec(&repeatM, (*p)); } + break; + case 9: +#line 300 "Parser.rl" + { pushOct(&octAccumulator, (*p)); } + break; + case 10: +#line 301 "Parser.rl" + { pushDec(&accumulator, (*p)); } + break; + case 11: +#line 302 "Parser.rl" + { + accumulator *= 16; + accumulator += (*p) - '0'; + } + break; + case 12: +#line 306 "Parser.rl" + { + accumulator *= 16; + accumulator += 10 + (*p) - 'a'; + } + break; + case 13: +#line 310 "Parser.rl" + { + accumulator *= 16; + accumulator += 10 + (*p) - 'A'; + } + break; + case 14: +#line 430 "Parser.rl" + { + newMode = mode; + } + break; + case 15: +#line 437 "Parser.rl" + { + switch ((*p)) { + case 'i': + newMode.caseless = true; + break; + case 'm': + newMode.multiline = true; + break; + case 's': + newMode.dotall = true; + break; + case 'x': + newMode.ignore_space = true; + break; + default: + assert(0); // this action only called for [imsx] + break; + } + } + break; + case 16: +#line 456 "Parser.rl" + { + switch ((*p)) { + case 'i': + newMode.caseless = false; + break; + case 'm': + newMode.multiline = false; + break; + case 's': + newMode.dotall = false; + break; + case 'x': + newMode.ignore_space = false; + break; + default: + assert(0); // this action only called for [imsx] + break; + } + } + break; + case 17: +#line 510 "Parser.rl" + {repeatM = repeatN;} + break; + case 18: +#line 510 "Parser.rl" + {repeatM = ComponentRepeat::NoLimit;} + break; + case 19: +#line 722 "Parser.rl" + { negated = !negated; } + break; + case 20: +#line 723 "Parser.rl" + { p--; { + DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top); + if ((int)stack.size() == top) { + stack.resize(2 * (top + 1)); + } + {stack[top++] = cs; cs = 790;goto _again;}} } + break; + case 21: +#line 724 "Parser.rl" + { if (!inCharClass) { // not inside [..] + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } + {cs = stack[--top]; goto _again;} + } + break; + case 22: +#line 730 "Parser.rl" + { throw LocatedParseError("Malformed property"); } + break; + case 25: +#line 1 "NONE" + {te = p+1;} + break; + case 26: +#line 550 "Parser.rl" + {te = p+1;{ + throw LocatedParseError("(*UTF8) must be at start of " + "expression, encountered"); + }} + break; + case 27: +#line 554 "Parser.rl" + {te = p+1;{ + throw LocatedParseError("(*UTF) must be at start of " + "expression, encountered"); + }} + break; + case 28: +#line 558 "Parser.rl" + {te = p+1;{ + throw LocatedParseError("(*UCP) must be at start of " + "expression, encountered"); + }} + break; + case 29: +#line 564 "Parser.rl" + {te = p+1;{ + ParseMode temp_mode; + assert(ts - 2 >= ptr); // parser needs the '(*' at the start too. + read_control_verbs(ts - 2, te, (ts - 2 - ptr), temp_mode); + assert(0); // Should have thrown a parse error. + throw LocatedParseError("Unknown control verb"); + }} + break; + case 30: +#line 571 "Parser.rl" + {te = p+1;{ + throw LocatedParseError("Unknown control verb"); + }} + break; + case 31: +#line 571 "Parser.rl" + {te = p;p--;{ + throw LocatedParseError("Unknown control verb"); + }} + break; + case 32: +#line 571 "Parser.rl" + {{p = ((te))-1;}{ + throw LocatedParseError("Unknown control verb"); + }} + break; + case 33: +#line 581 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_CC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 34: +#line 582 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_CF, negated); {cs = stack[--top]; goto _again;} }} + break; + case 35: +#line 583 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_CN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 36: +#line 585 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_CS, negated); {cs = stack[--top]; goto _again;} }} + break; + case 37: +#line 587 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_LL, negated); {cs = stack[--top]; goto _again;} }} + break; + case 38: +#line 588 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_LM, negated); {cs = stack[--top]; goto _again;} }} + break; + case 39: +#line 589 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_LO, negated); {cs = stack[--top]; goto _again;} }} + break; + case 40: +#line 590 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_LT, negated); {cs = stack[--top]; goto _again;} }} + break; + case 41: +#line 591 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_LU, negated); {cs = stack[--top]; goto _again;} }} + break; + case 42: +#line 592 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_L_AND, negated); {cs = stack[--top]; goto _again;} }} + break; + case 43: +#line 594 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_MC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 44: +#line 596 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_MN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 45: +#line 598 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_ND, negated); {cs = stack[--top]; goto _again;} }} + break; + case 46: +#line 599 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_NL, negated); {cs = stack[--top]; goto _again;} }} + break; + case 47: +#line 600 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_NO, negated); {cs = stack[--top]; goto _again;} }} + break; + case 48: +#line 602 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_PC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 49: +#line 603 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_PD, negated); {cs = stack[--top]; goto _again;} }} + break; + case 50: +#line 604 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_PE, negated); {cs = stack[--top]; goto _again;} }} + break; + case 51: +#line 605 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_PF, negated); {cs = stack[--top]; goto _again;} }} + break; + case 52: +#line 606 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_PI, negated); {cs = stack[--top]; goto _again;} }} + break; + case 53: +#line 607 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_PO, negated); {cs = stack[--top]; goto _again;} }} + break; + case 54: +#line 608 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_PS, negated); {cs = stack[--top]; goto _again;} }} + break; + case 55: +#line 610 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_SC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 56: +#line 611 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_SK, negated); {cs = stack[--top]; goto _again;} }} + break; + case 57: +#line 612 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_SM, negated); {cs = stack[--top]; goto _again;} }} + break; + case 58: +#line 613 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_SO, negated); {cs = stack[--top]; goto _again;} }} + break; + case 59: +#line 615 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_ZL, negated); {cs = stack[--top]; goto _again;} }} + break; + case 60: +#line 616 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_ZP, negated); {cs = stack[--top]; goto _again;} }} + break; + case 61: +#line 617 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_ZS, negated); {cs = stack[--top]; goto _again;} }} + break; + case 62: +#line 618 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_XAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 63: +#line 619 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_XPS, negated); {cs = stack[--top]; goto _again;} }} + break; + case 64: +#line 620 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_XSP, negated); {cs = stack[--top]; goto _again;} }} + break; + case 65: +#line 621 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_XWD, negated); {cs = stack[--top]; goto _again;} }} + break; + case 66: +#line 622 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_ARABIC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 67: +#line 623 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_ARMENIAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 68: +#line 624 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_AVESTAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 69: +#line 625 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_BALINESE, negated); {cs = stack[--top]; goto _again;} }} + break; + case 70: +#line 626 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_BAMUM, negated); {cs = stack[--top]; goto _again;} }} + break; + case 71: +#line 627 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_BATAK, negated); {cs = stack[--top]; goto _again;} }} + break; + case 72: +#line 628 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_BENGALI, negated); {cs = stack[--top]; goto _again;} }} + break; + case 73: +#line 629 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_BOPOMOFO, negated); {cs = stack[--top]; goto _again;} }} + break; + case 74: +#line 630 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_BRAHMI, negated); {cs = stack[--top]; goto _again;} }} + break; + case 75: +#line 631 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_BRAILLE, negated); {cs = stack[--top]; goto _again;} }} + break; + case 76: +#line 632 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_BUGINESE, negated); {cs = stack[--top]; goto _again;} }} + break; + case 77: +#line 633 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_BUHID, negated); {cs = stack[--top]; goto _again;} }} + break; + case 78: +#line 634 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_CANADIAN_ABORIGINAL, negated); {cs = stack[--top]; goto _again;} }} + break; + case 79: +#line 635 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_CARIAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 80: +#line 636 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_CHAM, negated); {cs = stack[--top]; goto _again;} }} + break; + case 81: +#line 637 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_CHEROKEE, negated); {cs = stack[--top]; goto _again;} }} + break; + case 82: +#line 638 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_COMMON, negated); {cs = stack[--top]; goto _again;} }} + break; + case 83: +#line 639 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_COPTIC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 84: +#line 640 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_CUNEIFORM, negated); {cs = stack[--top]; goto _again;} }} + break; + case 85: +#line 641 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_CYPRIOT, negated); {cs = stack[--top]; goto _again;} }} + break; + case 86: +#line 642 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_CYRILLIC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 87: +#line 643 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_DESERET, negated); {cs = stack[--top]; goto _again;} }} + break; + case 88: +#line 644 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_DEVANAGARI, negated); {cs = stack[--top]; goto _again;} }} + break; + case 89: +#line 645 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS, negated); {cs = stack[--top]; goto _again;} }} + break; + case 90: +#line 646 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_ETHIOPIC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 91: +#line 647 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_GEORGIAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 92: +#line 648 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_GLAGOLITIC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 93: +#line 649 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_GOTHIC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 94: +#line 650 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_GREEK, negated); {cs = stack[--top]; goto _again;} }} + break; + case 95: +#line 651 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_GUJARATI, negated); {cs = stack[--top]; goto _again;} }} + break; + case 96: +#line 652 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_GURMUKHI, negated); {cs = stack[--top]; goto _again;} }} + break; + case 97: +#line 654 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_HANGUL, negated); {cs = stack[--top]; goto _again;} }} + break; + case 98: +#line 655 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_HANUNOO, negated); {cs = stack[--top]; goto _again;} }} + break; + case 99: +#line 656 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_HEBREW, negated); {cs = stack[--top]; goto _again;} }} + break; + case 100: +#line 657 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_HIRAGANA, negated); {cs = stack[--top]; goto _again;} }} + break; + case 101: +#line 658 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_IMPERIAL_ARAMAIC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 102: +#line 659 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_INHERITED, negated); {cs = stack[--top]; goto _again;} }} + break; + case 103: +#line 660 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI, negated); {cs = stack[--top]; goto _again;} }} + break; + case 104: +#line 661 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 105: +#line 662 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_JAVANESE, negated); {cs = stack[--top]; goto _again;} }} + break; + case 106: +#line 663 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_KAITHI, negated); {cs = stack[--top]; goto _again;} }} + break; + case 107: +#line 664 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_KANNADA, negated); {cs = stack[--top]; goto _again;} }} + break; + case 108: +#line 665 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_KATAKANA, negated); {cs = stack[--top]; goto _again;} }} + break; + case 109: +#line 666 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_KAYAH_LI, negated); {cs = stack[--top]; goto _again;} }} + break; + case 110: +#line 667 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_KHAROSHTHI, negated); {cs = stack[--top]; goto _again;} }} + break; + case 111: +#line 668 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_KHMER, negated); {cs = stack[--top]; goto _again;} }} + break; + case 112: +#line 669 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_LAO, negated); {cs = stack[--top]; goto _again;} }} + break; + case 113: +#line 670 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_LATIN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 114: +#line 671 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_LEPCHA, negated); {cs = stack[--top]; goto _again;} }} + break; + case 115: +#line 672 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_LIMBU, negated); {cs = stack[--top]; goto _again;} }} + break; + case 116: +#line 673 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_LINEAR_B, negated); {cs = stack[--top]; goto _again;} }} + break; + case 117: +#line 674 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_LISU, negated); {cs = stack[--top]; goto _again;} }} + break; + case 118: +#line 675 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_LYCIAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 119: +#line 676 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_LYDIAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 120: +#line 677 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_MALAYALAM, negated); {cs = stack[--top]; goto _again;} }} + break; + case 121: +#line 678 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_MANDAIC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 122: +#line 679 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_MEETEI_MAYEK, negated); {cs = stack[--top]; goto _again;} }} + break; + case 123: +#line 680 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_MONGOLIAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 124: +#line 681 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_MYANMAR, negated); {cs = stack[--top]; goto _again;} }} + break; + case 125: +#line 682 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_NEW_TAI_LUE, negated); {cs = stack[--top]; goto _again;} }} + break; + case 126: +#line 683 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_NKO, negated); {cs = stack[--top]; goto _again;} }} + break; + case 127: +#line 684 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_OGHAM, negated); {cs = stack[--top]; goto _again;} }} + break; + case 128: +#line 685 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_OL_CHIKI, negated); {cs = stack[--top]; goto _again;} }} + break; + case 129: +#line 686 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_OLD_ITALIC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 130: +#line 687 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_OLD_PERSIAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 131: +#line 688 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_OLD_SOUTH_ARABIAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 132: +#line 689 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_OLD_TURKIC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 133: +#line 690 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_ORIYA, negated); {cs = stack[--top]; goto _again;} }} + break; + case 134: +#line 691 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_OSMANYA, negated); {cs = stack[--top]; goto _again;} }} + break; + case 135: +#line 692 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_PHAGS_PA, negated); {cs = stack[--top]; goto _again;} }} + break; + case 136: +#line 693 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_PHOENICIAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 137: +#line 694 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_REJANG, negated); {cs = stack[--top]; goto _again;} }} + break; + case 138: +#line 695 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_RUNIC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 139: +#line 696 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_SAMARITAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 140: +#line 697 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_SAURASHTRA, negated); {cs = stack[--top]; goto _again;} }} + break; + case 141: +#line 698 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_SHAVIAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 142: +#line 699 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_SINHALA, negated); {cs = stack[--top]; goto _again;} }} + break; + case 143: +#line 700 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_SUNDANESE, negated); {cs = stack[--top]; goto _again;} }} + break; + case 144: +#line 701 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_SYLOTI_NAGRI, negated); {cs = stack[--top]; goto _again;} }} + break; + case 145: +#line 702 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_SYRIAC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 146: +#line 703 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_TAGALOG, negated); {cs = stack[--top]; goto _again;} }} + break; + case 147: +#line 704 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_TAGBANWA, negated); {cs = stack[--top]; goto _again;} }} + break; + case 148: +#line 705 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_TAI_LE, negated); {cs = stack[--top]; goto _again;} }} + break; + case 149: +#line 706 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_TAI_THAM, negated); {cs = stack[--top]; goto _again;} }} + break; + case 150: +#line 707 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_TAI_VIET, negated); {cs = stack[--top]; goto _again;} }} + break; + case 151: +#line 708 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_TAMIL, negated); {cs = stack[--top]; goto _again;} }} + break; + case 152: +#line 709 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_TELUGU, negated); {cs = stack[--top]; goto _again;} }} + break; + case 153: +#line 710 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_THAANA, negated); {cs = stack[--top]; goto _again;} }} + break; + case 154: +#line 711 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_THAI, negated); {cs = stack[--top]; goto _again;} }} + break; + case 155: +#line 712 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_TIBETAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 156: +#line 713 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_TIFINAGH, negated); {cs = stack[--top]; goto _again;} }} + break; + case 157: +#line 714 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_UGARITIC, negated); {cs = stack[--top]; goto _again;} }} + break; + case 158: +#line 715 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_VAI, negated); {cs = stack[--top]; goto _again;} }} + break; + case 159: +#line 716 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_SCRIPT_YI, negated); {cs = stack[--top]; goto _again;} }} + break; + case 160: +#line 717 "Parser.rl" + {te = p+1;{ currentCls->add(CLASS_UCP_ANY, negated); {cs = stack[--top]; goto _again;} }} + break; + case 161: +#line 718 "Parser.rl" + {te = p+1;{ throw LocatedParseError("Unknown property"); }} + break; + case 162: +#line 580 "Parser.rl" + {te = p;p--;{ currentCls->add(CLASS_UCP_C, negated); {cs = stack[--top]; goto _again;} }} + break; + case 163: +#line 584 "Parser.rl" + {te = p;p--;{ currentCls->add(CLASS_UCP_CO, negated); {cs = stack[--top]; goto _again;} }} + break; + case 164: +#line 586 "Parser.rl" + {te = p;p--;{ currentCls->add(CLASS_UCP_L, negated); {cs = stack[--top]; goto _again;} }} + break; + case 165: +#line 593 "Parser.rl" + {te = p;p--;{ currentCls->add(CLASS_UCP_M, negated); {cs = stack[--top]; goto _again;} }} + break; + case 166: +#line 595 "Parser.rl" + {te = p;p--;{ currentCls->add(CLASS_UCP_ME, negated); {cs = stack[--top]; goto _again;} }} + break; + case 167: +#line 597 "Parser.rl" + {te = p;p--;{ currentCls->add(CLASS_UCP_N, negated); {cs = stack[--top]; goto _again;} }} + break; + case 168: +#line 601 "Parser.rl" + {te = p;p--;{ currentCls->add(CLASS_UCP_P, negated); {cs = stack[--top]; goto _again;} }} + break; + case 169: +#line 609 "Parser.rl" + {te = p;p--;{ currentCls->add(CLASS_UCP_S, negated); {cs = stack[--top]; goto _again;} }} + break; + case 170: +#line 614 "Parser.rl" + {te = p;p--;{ currentCls->add(CLASS_UCP_Z, negated); {cs = stack[--top]; goto _again;} }} + break; + case 171: +#line 653 "Parser.rl" + {te = p;p--;{ currentCls->add(CLASS_SCRIPT_HAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 172: +#line 718 "Parser.rl" + {te = p;p--;{ throw LocatedParseError("Unknown property"); }} + break; + case 173: +#line 580 "Parser.rl" + {{p = ((te))-1;}{ currentCls->add(CLASS_UCP_C, negated); {cs = stack[--top]; goto _again;} }} + break; + case 174: +#line 584 "Parser.rl" + {{p = ((te))-1;}{ currentCls->add(CLASS_UCP_CO, negated); {cs = stack[--top]; goto _again;} }} + break; + case 175: +#line 586 "Parser.rl" + {{p = ((te))-1;}{ currentCls->add(CLASS_UCP_L, negated); {cs = stack[--top]; goto _again;} }} + break; + case 176: +#line 593 "Parser.rl" + {{p = ((te))-1;}{ currentCls->add(CLASS_UCP_M, negated); {cs = stack[--top]; goto _again;} }} + break; + case 177: +#line 595 "Parser.rl" + {{p = ((te))-1;}{ currentCls->add(CLASS_UCP_ME, negated); {cs = stack[--top]; goto _again;} }} + break; + case 178: +#line 597 "Parser.rl" + {{p = ((te))-1;}{ currentCls->add(CLASS_UCP_N, negated); {cs = stack[--top]; goto _again;} }} + break; + case 179: +#line 601 "Parser.rl" + {{p = ((te))-1;}{ currentCls->add(CLASS_UCP_P, negated); {cs = stack[--top]; goto _again;} }} + break; + case 180: +#line 609 "Parser.rl" + {{p = ((te))-1;}{ currentCls->add(CLASS_UCP_S, negated); {cs = stack[--top]; goto _again;} }} + break; + case 181: +#line 653 "Parser.rl" + {{p = ((te))-1;}{ currentCls->add(CLASS_SCRIPT_HAN, negated); {cs = stack[--top]; goto _again;} }} + break; + case 182: +#line 718 "Parser.rl" + {{p = ((te))-1;}{ throw LocatedParseError("Unknown property"); }} + break; + case 183: +#line 733 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_UCP_C, negated); + if (!inCharClass) { + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } + {cs = stack[--top]; goto _again;} + }} + break; + case 184: +#line 741 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_UCP_L, negated); + if (!inCharClass) { + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } + {cs = stack[--top]; goto _again;} + }} + break; + case 185: +#line 749 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_UCP_M, negated); + if (!inCharClass) { + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } + {cs = stack[--top]; goto _again;} + }} + break; + case 186: +#line 757 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_UCP_N, negated); + if (!inCharClass) { + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } + {cs = stack[--top]; goto _again;} + }} + break; + case 187: +#line 765 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_UCP_P, negated); + if (!inCharClass) { + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } + {cs = stack[--top]; goto _again;} + }} + break; + case 188: +#line 773 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_UCP_S, negated); + if (!inCharClass) { + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } + {cs = stack[--top]; goto _again;} + }} + break; + case 189: +#line 781 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_UCP_Z, negated); + if (!inCharClass) { + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } + {cs = stack[--top]; goto _again;} + }} + break; + case 190: +#line 790 "Parser.rl" + {te = p+1;{ throw LocatedParseError("Unknown property"); }} + break; + case 191: +#line 796 "Parser.rl" + {te = p+1;{ + throw LocatedParseError("Unsupported POSIX collating " + "element"); + }} + break; + case 192: +#line 803 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_ALNUM, false); + }} + break; + case 193: +#line 806 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_ALNUM, true); + }} + break; + case 194: +#line 809 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_ALPHA, false); + }} + break; + case 195: +#line 812 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_ALPHA, true); + }} + break; + case 196: +#line 815 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_ASCII, false); + }} + break; + case 197: +#line 818 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_ASCII, true); + }} + break; + case 198: +#line 821 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_BLANK, false); + }} + break; + case 199: +#line 824 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_BLANK, true); + }} + break; + case 200: +#line 827 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_CNTRL, false); + }} + break; + case 201: +#line 830 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_CNTRL, true); + }} + break; + case 202: +#line 833 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_DIGIT, false); + }} + break; + case 203: +#line 836 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_DIGIT, true); + }} + break; + case 204: +#line 839 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_GRAPH, false); + }} + break; + case 205: +#line 842 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_GRAPH, true); + }} + break; + case 206: +#line 845 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_LOWER, false); + }} + break; + case 207: +#line 848 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_LOWER, true); + }} + break; + case 208: +#line 851 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_PRINT, false); + }} + break; + case 209: +#line 854 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_PRINT, true); + }} + break; + case 210: +#line 857 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_PUNCT, false); + }} + break; + case 211: +#line 860 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_PUNCT, true); + }} + break; + case 212: +#line 864 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_SPACE, false); + }} + break; + case 213: +#line 867 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_SPACE, true); + }} + break; + case 214: +#line 870 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_UPPER, false); + }} + break; + case 215: +#line 873 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_UPPER, true); + }} + break; + case 216: +#line 876 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_WORD, false); + }} + break; + case 217: +#line 879 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_WORD, true); + }} + break; + case 218: +#line 882 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_XDIGIT, false); + }} + break; + case 219: +#line 885 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_XDIGIT, true); + }} + break; + case 220: +#line 890 "Parser.rl" + {te = p+1;{ + throw LocatedParseError("Invalid POSIX named class"); + }} + break; + case 221: +#line 893 "Parser.rl" + {te = p+1;{ + { + DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top); + if ((int)stack.size() == top) { + stack.resize(2 * (top + 1)); + } + {stack[top++] = cs; cs = 843;goto _again;}} + }} + break; + case 222: +#line 896 "Parser.rl" + {te = p+1;{ /*noop*/}} + break; + case 223: +#line 898 "Parser.rl" + {te = p+1;{ + currentCls->add('\x08'); + }} + break; + case 224: +#line 902 "Parser.rl" + {te = p+1;{ + currentCls->add('\x09'); + }} + break; + case 225: +#line 906 "Parser.rl" + {te = p+1;{ + currentCls->add('\x0a'); + }} + break; + case 226: +#line 910 "Parser.rl" + {te = p+1;{ + currentCls->add('\x0d'); + }} + break; + case 227: +#line 914 "Parser.rl" + {te = p+1;{ + currentCls->add('\x0c'); + }} + break; + case 228: +#line 918 "Parser.rl" + {te = p+1;{ + currentCls->add('\x07'); + }} + break; + case 229: +#line 922 "Parser.rl" + {te = p+1;{ + currentCls->add('\x1b'); + }} + break; + case 230: +#line 926 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_HORZ, false); + }} + break; + case 231: +#line 930 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_HORZ, true); + }} + break; + case 232: +#line 934 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_VERT, false); + }} + break; + case 233: +#line 938 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_VERT, true); + }} + break; + case 234: +#line 942 "Parser.rl" + {te = p+1;{ + negated = false; + p--; + { + DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top); + if ((int)stack.size() == top) { + stack.resize(2 * (top + 1)); + } + {stack[top++] = cs; cs = 559;goto _again;}} + }} + break; + case 235: +#line 948 "Parser.rl" + {te = p+1;{ + negated = false; + p--; + { + DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top); + if ((int)stack.size() == top) { + stack.resize(2 * (top + 1)); + } + {stack[top++] = cs; cs = 818;goto _again;}} + }} + break; + case 236: +#line 954 "Parser.rl" + {te = p+1;{ + negated = true; + p--; + { + DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top); + if ((int)stack.size() == top) { + stack.resize(2 * (top + 1)); + } + {stack[top++] = cs; cs = 559;goto _again;}} + }} + break; + case 237: +#line 960 "Parser.rl" + {te = p+1;{ + negated = true; + p--; + { + DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top); + if ((int)stack.size() == top) { + stack.resize(2 * (top + 1)); + } + {stack[top++] = cs; cs = 818;goto _again;}} + }} + break; + case 238: +#line 970 "Parser.rl" + {te = p+1;{ + currentCls->add(octAccumulator); + }} + break; + case 239: +#line 973 "Parser.rl" + {te = p+1;{ + currentCls->add(octAccumulator); + }} + break; + case 240: +#line 977 "Parser.rl" + {te = p+1;{ + string oct(ts + 3, te - ts - 4); + unsigned long val; + try { + val = stoul(oct, nullptr, 8); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } + if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { + throw LocatedParseError("Value in \\o{...} sequence is too large"); + } + currentCls->add((unichar)val); + }} + break; + case 241: +#line 997 "Parser.rl" + {te = p+1;{ + currentCls->add(accumulator); + }} + break; + case 242: +#line 1001 "Parser.rl" + {te = p+1;{ + // whatever we found here + currentCls->add(*(ts + 1)); + + }} + break; + case 243: +#line 1007 "Parser.rl" + {te = p+1;{ + string hex(ts + 3, te - ts - 4); + unsigned long val; + try { + val = stoul(hex, nullptr, 16); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } + if (val > MAX_UNICODE) { + throw LocatedParseError("Value in \\x{...} sequence is too large"); + } + currentCls->add((unichar)val); + }} + break; + case 244: +#line 1025 "Parser.rl" + {te = p+1;{ + if (te - ts < 3) { + assert(te - ts == 2); + throw LocatedParseError(SLASH_C_ERROR); + } else { + assert(te - ts == 3); + currentCls->add(decodeCtrl(ts[2])); + } + }} + break; + case 245: +#line 1035 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_WORD, false); + }} + break; + case 246: +#line 1039 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_WORD, true); + }} + break; + case 247: +#line 1043 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_SPACE, false); + }} + break; + case 248: +#line 1047 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_SPACE, true); + }} + break; + case 249: +#line 1051 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_DIGIT, false); + }} + break; + case 250: +#line 1055 "Parser.rl" + {te = p+1;{ + currentCls->add(CLASS_DIGIT, true); + }} + break; + case 251: +#line 1058 "Parser.rl" + {te = p+1;{ + currentCls->addDash(); + }} + break; + case 252: +#line 276 "Parser.rl" + {te = p+1;{ + ostringstream str; + str << "'\\" << *(ts + 1) << "' at index " << ts - ptr + << " not supported in a character class."; + throw ParseError(str.str()); + }} + break; + case 253: +#line 276 "Parser.rl" + {te = p+1;{ + ostringstream str; + str << "'\\" << *(ts + 1) << "' at index " << ts - ptr + << " not supported in a character class."; + throw ParseError(str.str()); + }} + break; + case 254: +#line 276 "Parser.rl" + {te = p+1;{ + ostringstream str; + str << "'\\" << *(ts + 1) << "' at index " << ts - ptr + << " not supported in a character class."; + throw ParseError(str.str()); + }} + break; + case 255: +#line 1075 "Parser.rl" + {te = p+1;{ + // add the literal char + currentCls->add(*(ts + 1)); + }} + break; + case 256: +#line 1081 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + currentCls->add(readUtf8CodePoint2c(ts)); + }} + break; + case 257: +#line 1086 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + currentCls->add(readUtf8CodePoint3c(ts)); + }} + break; + case 258: +#line 1091 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + currentCls->add(readUtf8CodePoint4c(ts)); + }} + break; + case 259: +#line 1096 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + throwInvalidUtf8(); + }} + break; + case 260: +#line 1102 "Parser.rl" + {te = p+1;{ + currentCls->add((u8)*ts); + }} + break; + case 261: +#line 1106 "Parser.rl" + {te = p+1;{ + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + inCharClass = false; + {cs = 746;goto _again;} + }} + break; + case 262: +#line 966 "Parser.rl" + {te = p;p--;{ throw LocatedParseError("Malformed property"); }} + break; + case 263: +#line 967 "Parser.rl" + {te = p;p--;{ throw LocatedParseError("Malformed property"); }} + break; + case 264: +#line 970 "Parser.rl" + {te = p;p--;{ + currentCls->add(octAccumulator); + }} + break; + case 265: +#line 973 "Parser.rl" + {te = p;p--;{ + currentCls->add(octAccumulator); + }} + break; + case 266: +#line 992 "Parser.rl" + {te = p;p--;{ + throw LocatedParseError("Value in \\o{...} sequence is non-octal or missing braces"); + }} + break; + case 267: +#line 997 "Parser.rl" + {te = p;p--;{ + currentCls->add(accumulator); + }} + break; + case 268: +#line 1021 "Parser.rl" + {te = p;p--;{ + throw LocatedParseError("Value in \\x{...} sequence is non-hex or missing }"); + }} + break; + case 269: +#line 1025 "Parser.rl" + {te = p;p--;{ + if (te - ts < 3) { + assert(te - ts == 2); + throw LocatedParseError(SLASH_C_ERROR); + } else { + assert(te - ts == 3); + currentCls->add(decodeCtrl(ts[2])); + } + }} + break; + case 270: +#line 1096 "Parser.rl" + {te = p;p--;{ + assert(mode.utf8); + throwInvalidUtf8(); + }} + break; + case 271: +#line 1102 "Parser.rl" + {te = p;p--;{ + currentCls->add((u8)*ts); + }} + break; + case 272: +#line 992 "Parser.rl" + {{p = ((te))-1;}{ + throw LocatedParseError("Value in \\o{...} sequence is non-octal or missing braces"); + }} + break; + case 273: +#line 1021 "Parser.rl" + {{p = ((te))-1;}{ + throw LocatedParseError("Value in \\x{...} sequence is non-hex or missing }"); + }} + break; + case 274: +#line 1096 "Parser.rl" + {{p = ((te))-1;}{ + assert(mode.utf8); + throwInvalidUtf8(); + }} + break; + case 275: +#line 1102 "Parser.rl" + {{p = ((te))-1;}{ + currentCls->add((u8)*ts); + }} + break; + case 276: +#line 1120 "Parser.rl" + {te = p+1;{ + if (currentCls->isNegated()) { + // Already seen a caret; the second one is not a meta-character. + inCharClassEarly = false; + p--; {cs = 819;goto _again;} + } else { + currentCls->negate(); + // Note: we cannot switch off inCharClassEarly here, as /[^]]/ + // needs to use the right square bracket path below. + } + }} + break; + case 277: +#line 1133 "Parser.rl" + {te = p+1;{ + currentCls->add(']'); + inCharClassEarly = false; + }} + break; + case 278: +#line 1138 "Parser.rl" + {te = p+1;{ { + DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top); + if ((int)stack.size() == top) { + stack.resize(2 * (top + 1)); + } + {stack[top++] = cs; cs = 843;goto _again;}} }} + break; + case 279: +#line 1139 "Parser.rl" + {te = p+1;{ /*noop*/}} + break; + case 280: +#line 1142 "Parser.rl" + {te = p+1;{ + inCharClassEarly = false; + p--; + {cs = 819;goto _again;} + }} + break; + case 281: +#line 1142 "Parser.rl" + {te = p;p--;{ + inCharClassEarly = false; + p--; + {cs = 819;goto _again;} + }} + break; + case 282: +#line 1154 "Parser.rl" + {te = p+1;{ + {cs = 746;goto _again;} + }} + break; + case 283: +#line 1159 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + cc->add(readUtf8CodePoint2c(ts)); + cc->finalize(); + currentSeq->addComponent(move(cc)); + }} + break; + case 284: +#line 1168 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + cc->add(readUtf8CodePoint3c(ts)); + cc->finalize(); + currentSeq->addComponent(move(cc)); + }} + break; + case 285: +#line 1177 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + cc->add(readUtf8CodePoint4c(ts)); + cc->finalize(); + currentSeq->addComponent(move(cc)); + }} + break; + case 286: +#line 1186 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + throwInvalidUtf8(); + }} + break; + case 287: +#line 1192 "Parser.rl" + {te = p+1;{ + addLiteral(currentSeq, *ts, mode); + }} + break; + case 288: +#line 1186 "Parser.rl" + {te = p;p--;{ + assert(mode.utf8); + throwInvalidUtf8(); + }} + break; + case 289: +#line 1192 "Parser.rl" + {te = p;p--;{ + addLiteral(currentSeq, *ts, mode); + }} + break; + case 290: +#line 1186 "Parser.rl" + {{p = ((te))-1;}{ + assert(mode.utf8); + throwInvalidUtf8(); + }} + break; + case 291: +#line 1202 "Parser.rl" + {te = p+1;{ + {cs = stack[--top]; goto _again;} + }} + break; + case 292: +#line 1207 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + currentCls->add(readUtf8CodePoint2c(ts)); + inCharClassEarly = false; + }} + break; + case 293: +#line 1213 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + currentCls->add(readUtf8CodePoint3c(ts)); + inCharClassEarly = false; + }} + break; + case 294: +#line 1219 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + currentCls->add(readUtf8CodePoint4c(ts)); + inCharClassEarly = false; + }} + break; + case 295: +#line 1225 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + throwInvalidUtf8(); + }} + break; + case 296: +#line 1231 "Parser.rl" + {te = p+1;{ + currentCls->add(*ts); + inCharClassEarly = false; + }} + break; + case 297: +#line 1225 "Parser.rl" + {te = p;p--;{ + assert(mode.utf8); + throwInvalidUtf8(); + }} + break; + case 298: +#line 1231 "Parser.rl" + {te = p;p--;{ + currentCls->add(*ts); + inCharClassEarly = false; + }} + break; + case 299: +#line 1225 "Parser.rl" + {{p = ((te))-1;}{ + assert(mode.utf8); + throwInvalidUtf8(); + }} + break; + case 300: +#line 1243 "Parser.rl" + {te = p+1;{ inComment = false; {cs = 746;goto _again;} }} + break; + case 301: +#line 1247 "Parser.rl" + {te = p+1;} + break; + case 302: +#line 1255 "Parser.rl" + {te = p+1;{ inComment = false; {cs = 746;goto _again;} }} + break; + case 303: +#line 1259 "Parser.rl" + {te = p+1;} + break; + case 304: +#line 1491 "Parser.rl" + {act = 288;} + break; + case 305: +#line 1508 "Parser.rl" + {act = 290;} + break; + case 306: +#line 1737 "Parser.rl" + {act = 330;} + break; + case 307: +#line 362 "Parser.rl" + {te = p+1;{ + if (sequences.empty()) { + throw LocatedParseError("Unmatched parentheses"); + } + currentSeq->finalize(); + POP_SEQUENCE; + }} + break; + case 308: +#line 1274 "Parser.rl" + {te = p+1;{ + currentSeq->addAlternation(); + }} + break; + case 309: +#line 1279 "Parser.rl" + {te = p+1;{ + throw LocatedParseError("POSIX named classes are only " + "supported inside a class"); + }} + break; + case 310: +#line 1286 "Parser.rl" + {te = p+1;{ + throw LocatedParseError("Unsupported POSIX collating " + "element"); + }} + break; + case 311: +#line 1293 "Parser.rl" + {te = p+1;{ + {cs = 838;goto _again;} + }} + break; + case 312: +#line 1297 "Parser.rl" + {te = p+1;{ /* noop */ }} + break; + case 313: +#line 1299 "Parser.rl" + {te = p+1;{ + currentSeq->addComponent(generateComponent(CLASS_ANY, false, mode)); + }} + break; + case 314: +#line 1303 "Parser.rl" + {te = p+1;{ + if (mode.utf8) { + throw LocatedParseError("\\C is unsupported in UTF8"); + } + currentSeq->addComponent(std::make_unique()); + }} + break; + case 315: +#line 1317 "Parser.rl" + {te = p+1;{ + if (!currentSeq->addRepeat(0, ComponentRepeat::NoLimit, + ComponentRepeat::REPEAT_NONGREEDY)) { + throwInvalidRepeat(); + } + }} + break; + case 316: +#line 1324 "Parser.rl" + {te = p+1;{ + if (!currentSeq->addRepeat(0, ComponentRepeat::NoLimit, + ComponentRepeat::REPEAT_POSSESSIVE)) { + throwInvalidRepeat(); + } + }} + break; + case 317: +#line 1338 "Parser.rl" + {te = p+1;{ + if (!currentSeq->addRepeat(1, ComponentRepeat::NoLimit, + ComponentRepeat::REPEAT_NONGREEDY)) { + throwInvalidRepeat(); + } + }} + break; + case 318: +#line 1345 "Parser.rl" + {te = p+1;{ + if (!currentSeq->addRepeat(1, ComponentRepeat::NoLimit, + ComponentRepeat::REPEAT_POSSESSIVE)) { + throwInvalidRepeat(); + } + }} + break; + case 319: +#line 1359 "Parser.rl" + {te = p+1;{ + if (!currentSeq->addRepeat( + 0, 1, ComponentRepeat::REPEAT_NONGREEDY)) { + throwInvalidRepeat(); + } + }} + break; + case 320: +#line 1366 "Parser.rl" + {te = p+1;{ + if (!currentSeq->addRepeat( + 0, 1, ComponentRepeat::REPEAT_POSSESSIVE)) { + throwInvalidRepeat(); + } + }} + break; + case 321: +#line 1383 "Parser.rl" + {te = p+1;{ + if (repeatN > repeatM || repeatM == 0) { + throwInvalidRepeat(); + } else if (!currentSeq->addRepeat( + repeatN, repeatM, + ComponentRepeat::REPEAT_NONGREEDY)) { + throwInvalidRepeat(); + } + }} + break; + case 322: +#line 1393 "Parser.rl" + {te = p+1;{ + if (repeatN > repeatM || repeatM == 0) { + throwInvalidRepeat(); + } else if (!currentSeq->addRepeat( + repeatN, repeatM, + ComponentRepeat::REPEAT_POSSESSIVE)) { + throwInvalidRepeat(); + } + }} + break; + case 323: +#line 322 "Parser.rl" + {te = p+1;{ + inComment = true; + {cs = 849;goto _again;} + }} + break; + case 324: +#line 1410 "Parser.rl" + {te = p+1;{ p--; { + DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top); + if ((int)stack.size() == top) { + stack.resize(2 * (top + 1)); + } + {stack[top++] = cs; cs = 787;goto _again;}} }} + break; + case 325: +#line 1414 "Parser.rl" + {te = p+1;{ assert(0); {p++; goto _out; } }} + break; + case 326: +#line 1421 "Parser.rl" + {te = p+1;{ + auto bound = mode.multiline ? ComponentBoundary::BEGIN_LINE + : ComponentBoundary::BEGIN_STRING; + currentSeq->addComponent(std::make_unique(bound)); + }} + break; + case 327: +#line 1428 "Parser.rl" + {te = p+1;{ + auto bound = mode.multiline ? ComponentBoundary::END_LINE + : ComponentBoundary::END_STRING_OPTIONAL_LF; + currentSeq->addComponent(std::make_unique(bound)); + }} + break; + case 328: +#line 1434 "Parser.rl" + {te = p+1;{ + auto bound = ComponentBoundary::BEGIN_STRING; + currentSeq->addComponent(std::make_unique(bound)); + }} + break; + case 329: +#line 1439 "Parser.rl" + {te = p+1;{ + auto bound = ComponentBoundary::END_STRING_OPTIONAL_LF; + currentSeq->addComponent(std::make_unique(bound)); + }} + break; + case 330: +#line 1444 "Parser.rl" + {te = p+1;{ + auto bound = ComponentBoundary::END_STRING; + currentSeq->addComponent(std::make_unique(bound)); + }} + break; + case 331: +#line 1449 "Parser.rl" + {te = p+1;{ + currentSeq->addComponent( + std::make_unique(ts - ptr, false, mode)); + }} + break; + case 332: +#line 1454 "Parser.rl" + {te = p+1;{ + currentSeq->addComponent( + std::make_unique(ts - ptr, true, mode)); + }} + break; + case 333: +#line 1464 "Parser.rl" + {te = p+1;{ + addLiteral(currentSeq, '\x09', mode); + }} + break; + case 334: +#line 1468 "Parser.rl" + {te = p+1;{ + addLiteral(currentSeq, '\x0a', mode); + }} + break; + case 335: +#line 1472 "Parser.rl" + {te = p+1;{ + addLiteral(currentSeq, '\x0d', mode); + }} + break; + case 336: +#line 1476 "Parser.rl" + {te = p+1;{ + addLiteral(currentSeq, '\x0c', mode); + }} + break; + case 337: +#line 1480 "Parser.rl" + {te = p+1;{ + addLiteral(currentSeq, '\x07', mode); + }} + break; + case 338: +#line 1484 "Parser.rl" + {te = p+1;{ + addLiteral(currentSeq, '\x1b', mode); + }} + break; + case 339: +#line 1488 "Parser.rl" + {te = p+1;{ + addLiteral(currentSeq, octAccumulator, mode); + }} + break; + case 340: +#line 479 "Parser.rl" + {te = p+1;{ + if (accumulator == 0) { + throw LocatedParseError("Numbered reference cannot be zero"); + } + currentSeq->addComponent(std::make_unique(accumulator)); + }} + break; + case 341: +#line 486 "Parser.rl" + {te = p+1;{ + // Accumulator is a negative offset. + if (accumulator == 0) { + throw LocatedParseError("Numbered reference cannot be zero"); + } + if (accumulator >= groupIndex) { + throw LocatedParseError("Invalid reference"); + } + unsigned idx = groupIndex - accumulator; + currentSeq->addComponent(std::make_unique(idx)); + }} + break; + case 342: +#line 479 "Parser.rl" + {te = p+1;{ + if (accumulator == 0) { + throw LocatedParseError("Numbered reference cannot be zero"); + } + currentSeq->addComponent(std::make_unique(accumulator)); + }} + break; + case 343: +#line 486 "Parser.rl" + {te = p+1;{ + // Accumulator is a negative offset. + if (accumulator == 0) { + throw LocatedParseError("Numbered reference cannot be zero"); + } + if (accumulator >= groupIndex) { + throw LocatedParseError("Invalid reference"); + } + unsigned idx = groupIndex - accumulator; + currentSeq->addComponent(std::make_unique(idx)); + }} + break; + case 344: +#line 498 "Parser.rl" + {te = p+1;{ + currentSeq->addComponent(std::make_unique(label)); + }} + break; + case 345: +#line 498 "Parser.rl" + {te = p+1;{ + currentSeq->addComponent(std::make_unique(label)); + }} + break; + case 346: +#line 498 "Parser.rl" + {te = p+1;{ + currentSeq->addComponent(std::make_unique(label)); + }} + break; + case 347: +#line 498 "Parser.rl" + {te = p+1;{ + currentSeq->addComponent(std::make_unique(label)); + }} + break; + case 348: +#line 498 "Parser.rl" + {te = p+1;{ + currentSeq->addComponent(std::make_unique(label)); + }} + break; + case 349: +#line 1549 "Parser.rl" + {te = p+1;{ + ostringstream str; + str << "Onigiruma subroutine call at index " << ts - ptr << + " not supported."; + throw ParseError(str.str()); + }} + break; + case 350: +#line 1560 "Parser.rl" + {te = p+1;{ + string oct(ts + 3, te - ts - 4); + unsigned long val; + try { + val = stoul(oct, nullptr, 8); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } + if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { + throw LocatedParseError("Value in \\o{...} sequence is too large"); + } + addEscapedOctal(currentSeq, (unichar)val, mode); + }} + break; + case 351: +#line 1578 "Parser.rl" + {te = p+1;{ + addEscapedHex(currentSeq, accumulator, mode); + }} + break; + case 352: +#line 1582 "Parser.rl" + {te = p+1;{ + string hex(ts + 3, te - ts - 4); + unsigned long val; + try { + val = stoul(hex, nullptr, 16); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } + if (val > MAX_UNICODE) { + throw LocatedParseError("Value in \\x{...} sequence is too large"); + } + addEscapedHex(currentSeq, (unichar)val, mode); + }} + break; + case 353: +#line 1600 "Parser.rl" + {te = p+1;{ + if (te - ts < 3) { + assert(te - ts == 2); + throw LocatedParseError(SLASH_C_ERROR); + } else { + assert(te - ts == 3); + addLiteral(currentSeq, decodeCtrl(ts[2]), mode); + } + }} + break; + case 354: +#line 1610 "Parser.rl" + {te = p+1;{ + ostringstream str; + str << "'\\" << *(ts + 1) << "' at index " << ts - ptr + << " not supported."; + throw ParseError(str.str()); + }} + break; + case 355: +#line 1618 "Parser.rl" + {te = p+1;{ + auto cc = generateComponent(CLASS_WORD, false, mode); + currentSeq->addComponent(move(cc)); + }} + break; + case 356: +#line 1623 "Parser.rl" + {te = p+1;{ + auto cc = generateComponent(CLASS_WORD, true, mode); + currentSeq->addComponent(move(cc)); + }} + break; + case 357: +#line 1628 "Parser.rl" + {te = p+1;{ + auto cc = generateComponent(CLASS_SPACE, false, mode); + currentSeq->addComponent(move(cc)); + }} + break; + case 358: +#line 1633 "Parser.rl" + {te = p+1;{ + auto cc = generateComponent(CLASS_SPACE, true, mode); + currentSeq->addComponent(move(cc)); + }} + break; + case 359: +#line 1638 "Parser.rl" + {te = p+1;{ + auto cc = generateComponent(CLASS_DIGIT, false, mode); + currentSeq->addComponent(move(cc)); + }} + break; + case 360: +#line 1643 "Parser.rl" + {te = p+1;{ + auto cc = generateComponent(CLASS_DIGIT, true, mode); + currentSeq->addComponent(move(cc)); + }} + break; + case 361: +#line 1648 "Parser.rl" + {te = p+1;{ + auto cc = generateComponent(CLASS_HORZ, false, mode); + currentSeq->addComponent(move(cc)); + }} + break; + case 362: +#line 1653 "Parser.rl" + {te = p+1;{ + auto cc = generateComponent(CLASS_HORZ, true, mode); + currentSeq->addComponent(move(cc)); + }} + break; + case 363: +#line 1658 "Parser.rl" + {te = p+1;{ + auto cc = generateComponent(CLASS_VERT, false, mode); + currentSeq->addComponent(move(cc)); + }} + break; + case 364: +#line 1663 "Parser.rl" + {te = p+1;{ + auto cc = generateComponent(CLASS_VERT, true, mode); + currentSeq->addComponent(move(cc)); + }} + break; + case 365: +#line 1668 "Parser.rl" + {te = p+1;{ + assert(!currentCls && !inCharClass); + currentCls = getComponentClass(mode); + negated = false; + p--; + { + DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top); + if ((int)stack.size() == top) { + stack.resize(2 * (top + 1)); + } + {stack[top++] = cs; cs = 559;goto _again;}} + }} + break; + case 366: +#line 1676 "Parser.rl" + {te = p+1;{ + assert(!currentCls && !inCharClass); + currentCls = getComponentClass(mode); + negated = false; + p--; + { + DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top); + if ((int)stack.size() == top) { + stack.resize(2 * (top + 1)); + } + {stack[top++] = cs; cs = 818;goto _again;}} + }} + break; + case 367: +#line 1684 "Parser.rl" + {te = p+1;{ + assert(!currentCls && !inCharClass); + currentCls = getComponentClass(mode); + negated = true; + p--; + { + DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top); + if ((int)stack.size() == top) { + stack.resize(2 * (top + 1)); + } + {stack[top++] = cs; cs = 559;goto _again;}} + }} + break; + case 368: +#line 1692 "Parser.rl" + {te = p+1;{ + assert(!currentCls && !inCharClass); + currentCls = getComponentClass(mode); + negated = true; + p--; + { + DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top); + if ((int)stack.size() == top) { + stack.resize(2 * (top + 1)); + } + {stack[top++] = cs; cs = 818;goto _again;}} + }} + break; + case 369: +#line 1704 "Parser.rl" + {te = p+1;{ + ostringstream str; + str << "\\R at index " << ts - ptr << " not supported."; + throw ParseError(str.str()); + }} + break; + case 370: +#line 1711 "Parser.rl" + {te = p+1;{ + ostringstream str; + str << "\\K at index " << ts - ptr << " not supported."; + throw ParseError(str.str()); + }} + break; + case 371: +#line 1726 "Parser.rl" + {te = p+1;{ + ostringstream str; + str << "\\G at index " << ts - ptr << " not supported."; + throw ParseError(str.str()); + }} + break; + case 372: +#line 1732 "Parser.rl" + {te = p+1;{ + currentSeq->addComponent(std::make_unique(ts - ptr, mode)); + }} + break; + case 373: +#line 1737 "Parser.rl" + {te = p+1;{ + addLiteral(currentSeq, *(ts + 1), mode); + }} + break; + case 374: +#line 316 "Parser.rl" + {te = p+1;{ + inComment = true; + {cs = 848;goto _again;} + }} + break; + case 375: +#line 433 "Parser.rl" + {te = p+1;{ + mode = newMode; + currentSeq->addComponent(std::make_unique()); + }} + break; + case 376: +#line 355 "Parser.rl" + {te = p+1;{ + PUSH_SEQUENCE; + mode = newMode; + currentSeq = + enterSequence(currentSeq, std::make_unique()); + }} + break; + case 377: +#line 369 "Parser.rl" + {te = p+1;{ + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + std::make_unique(ComponentAssertion::LOOKAHEAD, + ComponentAssertion::POS)); + }} + break; + case 378: +#line 375 "Parser.rl" + {te = p+1;{ + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + std::make_unique(ComponentAssertion::LOOKAHEAD, + ComponentAssertion::NEG)); + }} + break; + case 379: +#line 381 "Parser.rl" + {te = p+1;{ + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + std::make_unique(ComponentAssertion::LOOKBEHIND, + ComponentAssertion::POS)); + }} + break; + case 380: +#line 387 "Parser.rl" + {te = p+1;{ + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + std::make_unique(ComponentAssertion::LOOKBEHIND, + ComponentAssertion::NEG)); + }} + break; + case 381: +#line 393 "Parser.rl" + {te = p+1;{ + throw LocatedParseError("Embedded code is not supported"); + }} + break; + case 382: +#line 393 "Parser.rl" + {te = p+1;{ + throw LocatedParseError("Embedded code is not supported"); + }} + break; + case 383: +#line 416 "Parser.rl" + {te = p+1;{ + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + std::make_unique()); + }} + break; + case 384: +#line 336 "Parser.rl" + {te = p+1;{ + assert(!label.empty()); // should be guaranteed by machine + char c = *label.begin(); + if (c >= '0' && c <= '9') { + throw LocatedParseError("Group name cannot begin with a digit"); + } + if (!groupNames.insert(label).second) { + throw LocatedParseError("Two named subpatterns use the name '" + label + "'"); + } + PUSH_SEQUENCE; + auto seq = std::make_unique(); + seq->setCaptureIndex(groupIndex++); + seq->setCaptureName(label); + currentSeq = enterSequence(currentSeq, move(seq)); + }} + break; + case 385: +#line 399 "Parser.rl" + {te = p+1;{ + throw LocatedParseError("Subpattern reference unsupported"); + }} + break; + case 386: +#line 399 "Parser.rl" + {te = p+1;{ + throw LocatedParseError("Subpattern reference unsupported"); + }} + break; + case 387: +#line 1783 "Parser.rl" + {te = p+1;{ + auto a = std::make_unique( + ComponentAssertion::LOOKAHEAD, ComponentAssertion::POS); + ComponentAssertion *a_seq = a.get(); + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + std::make_unique(move(a))); + PUSH_SEQUENCE; + currentSeq = a_seq; + }} + break; + case 388: +#line 1794 "Parser.rl" + {te = p+1;{ + auto a = std::make_unique( + ComponentAssertion::LOOKAHEAD, ComponentAssertion::NEG); + ComponentAssertion *a_seq = a.get(); + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + std::make_unique(move(a))); + PUSH_SEQUENCE; + currentSeq = a_seq; + }} + break; + case 389: +#line 1805 "Parser.rl" + {te = p+1;{ + auto a = std::make_unique( + ComponentAssertion::LOOKBEHIND, ComponentAssertion::POS); + ComponentAssertion *a_seq = a.get(); + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + std::make_unique(move(a))); + PUSH_SEQUENCE; + currentSeq = a_seq; + }} + break; + case 390: +#line 1816 "Parser.rl" + {te = p+1;{ + auto a = std::make_unique( + ComponentAssertion::LOOKBEHIND, ComponentAssertion::NEG); + ComponentAssertion *a_seq = a.get(); + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + std::make_unique(move(a))); + PUSH_SEQUENCE; + currentSeq = a_seq; + }} + break; + case 391: +#line 1828 "Parser.rl" + {te = p+1;{ + throw LocatedParseError("Pattern recursion not supported"); + }} + break; + case 392: +#line 402 "Parser.rl" + {te = p+1;{ + if (accumulator == 0) { + throw LocatedParseError("Numbered reference cannot be zero"); + } + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + std::make_unique(accumulator)); + }} + break; + case 393: +#line 410 "Parser.rl" + {te = p+1;{ + PUSH_SEQUENCE; + assert(!label.empty()); + currentSeq = enterSequence(currentSeq, + std::make_unique(label)); + }} + break; + case 394: +#line 1844 "Parser.rl" + {te = p+1;{ + ostringstream str; + str << "Callout at index " << ts - ptr << " not supported."; + throw ParseError(str.str()); + }} + break; + case 395: +#line 1852 "Parser.rl" + {te = p+1;{ + throw LocatedParseError("Unrecognised character after (?"); + }} + break; + case 396: +#line 1857 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + cc->add(readUtf8CodePoint2c(ts)); + cc->finalize(); + currentSeq->addComponent(move(cc)); + }} + break; + case 397: +#line 1866 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + cc->add(readUtf8CodePoint3c(ts)); + cc->finalize(); + currentSeq->addComponent(move(cc)); + }} + break; + case 398: +#line 1875 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + cc->add(readUtf8CodePoint4c(ts)); + cc->finalize(); + currentSeq->addComponent(move(cc)); + }} + break; + case 399: +#line 1884 "Parser.rl" + {te = p+1;{ + assert(mode.utf8); + throwInvalidUtf8(); + }} + break; + case 400: +#line 1893 "Parser.rl" + {te = p+1;{ + if (mode.ignore_space == false) { + addLiteral(currentSeq, *ts, mode); + } + }} + break; + case 401: +#line 1898 "Parser.rl" + {te = p+1;{ + addLiteral(currentSeq, *ts, mode); + }} + break; + case 402: +#line 328 "Parser.rl" + {te = p;p--;{ + PUSH_SEQUENCE; + auto seq = std::make_unique(); + seq->setCaptureIndex(groupIndex++); + currentSeq = enterSequence(currentSeq, move(seq)); + }} + break; + case 403: +#line 421 "Parser.rl" + {te = p;p--;{ + assert(!currentCls); + assert(!inCharClass); // not reentrant + currentCls = getComponentClass(mode); + inCharClass = true; + inCharClassEarly = true; + currentClsBegin = ts; + {cs = 836;goto _again;} + }} + break; + case 404: +#line 1310 "Parser.rl" + {te = p;p--;{ + if (!currentSeq->addRepeat(0, ComponentRepeat::NoLimit, + ComponentRepeat::REPEAT_GREEDY)) { + throwInvalidRepeat(); + } + }} + break; + case 405: +#line 1331 "Parser.rl" + {te = p;p--;{ + if (!currentSeq->addRepeat(1, ComponentRepeat::NoLimit, + ComponentRepeat::REPEAT_GREEDY)) { + throwInvalidRepeat(); + } + }} + break; + case 406: +#line 1352 "Parser.rl" + {te = p;p--;{ + if (!currentSeq->addRepeat( + 0, 1, ComponentRepeat::REPEAT_GREEDY)) { + throwInvalidRepeat(); + } + }} + break; + case 407: +#line 1373 "Parser.rl" + {te = p;p--;{ + if (repeatN > repeatM || repeatM == 0) { + throwInvalidRepeat(); + } else if (!currentSeq->addRepeat( + repeatN, repeatM, + ComponentRepeat::REPEAT_GREEDY)) { + throwInvalidRepeat(); + } + }} + break; + case 408: +#line 1488 "Parser.rl" + {te = p;p--;{ + addLiteral(currentSeq, octAccumulator, mode); + }} + break; + case 409: +#line 1491 "Parser.rl" + {te = p;p--;{ + // If there are enough capturing sub expressions, this may be + // a back reference + accumulator = parseAsDecimal(octAccumulator); + if (accumulator < groupIndex) { + currentSeq->addComponent(std::make_unique(accumulator)); + } else { + addEscapedOctal(currentSeq, octAccumulator, mode); + } + }} + break; + case 410: +#line 479 "Parser.rl" + {te = p;p--;{ + if (accumulator == 0) { + throw LocatedParseError("Numbered reference cannot be zero"); + } + currentSeq->addComponent(std::make_unique(accumulator)); + }} + break; + case 411: +#line 479 "Parser.rl" + {te = p;p--;{ + if (accumulator == 0) { + throw LocatedParseError("Numbered reference cannot be zero"); + } + currentSeq->addComponent(std::make_unique(accumulator)); + }} + break; + case 412: +#line 486 "Parser.rl" + {te = p;p--;{ + // Accumulator is a negative offset. + if (accumulator == 0) { + throw LocatedParseError("Numbered reference cannot be zero"); + } + if (accumulator >= groupIndex) { + throw LocatedParseError("Invalid reference"); + } + unsigned idx = groupIndex - accumulator; + currentSeq->addComponent(std::make_unique(idx)); + }} + break; + case 413: +#line 1557 "Parser.rl" + {te = p;p--;{ + throw LocatedParseError("Invalid reference after \\g"); + }} + break; + case 414: +#line 1574 "Parser.rl" + {te = p;p--;{ + throw LocatedParseError("Value in \\o{...} sequence is non-octal or missing braces"); + }} + break; + case 415: +#line 1578 "Parser.rl" + {te = p;p--;{ + addEscapedHex(currentSeq, accumulator, mode); + }} + break; + case 416: +#line 1596 "Parser.rl" + {te = p;p--;{ + throw LocatedParseError("Value in \\x{...} sequence is non-hex or missing }"); + }} + break; + case 417: +#line 1600 "Parser.rl" + {te = p;p--;{ + if (te - ts < 3) { + assert(te - ts == 2); + throw LocatedParseError(SLASH_C_ERROR); + } else { + assert(te - ts == 3); + addLiteral(currentSeq, decodeCtrl(ts[2]), mode); + } + }} + break; + case 418: +#line 1700 "Parser.rl" + {te = p;p--;{ throw LocatedParseError("Malformed property"); }} + break; + case 419: +#line 1701 "Parser.rl" + {te = p;p--;{ throw LocatedParseError("Malformed property"); }} + break; + case 420: +#line 1719 "Parser.rl" + {te = p;p--;{ + ostringstream str; + str << "\\k at index " << ts - ptr << " not supported."; + throw ParseError(str.str()); + }} + break; + case 421: +#line 1742 "Parser.rl" + {te = p;p--;{ + assert(ts + 1 == pe); + ostringstream str; + str << "Unescaped \\ at end of input, index " << ts - ptr << "."; + throw ParseError(str.str()); + }} + break; + case 422: +#line 396 "Parser.rl" + {te = p;p--;{ + throw LocatedParseError("Conditional subpattern unsupported"); + }} + break; + case 423: +#line 1852 "Parser.rl" + {te = p;p--;{ + throw LocatedParseError("Unrecognised character after (?"); + }} + break; + case 424: +#line 1884 "Parser.rl" + {te = p;p--;{ + assert(mode.utf8); + throwInvalidUtf8(); + }} + break; + case 425: +#line 1898 "Parser.rl" + {te = p;p--;{ + addLiteral(currentSeq, *ts, mode); + }} + break; + case 426: +#line 328 "Parser.rl" + {{p = ((te))-1;}{ + PUSH_SEQUENCE; + auto seq = std::make_unique(); + seq->setCaptureIndex(groupIndex++); + currentSeq = enterSequence(currentSeq, move(seq)); + }} + break; + case 427: +#line 421 "Parser.rl" + {{p = ((te))-1;}{ + assert(!currentCls); + assert(!inCharClass); // not reentrant + currentCls = getComponentClass(mode); + inCharClass = true; + inCharClassEarly = true; + currentClsBegin = ts; + {cs = 836;goto _again;} + }} + break; + case 428: +#line 1557 "Parser.rl" + {{p = ((te))-1;}{ + throw LocatedParseError("Invalid reference after \\g"); + }} + break; + case 429: +#line 1574 "Parser.rl" + {{p = ((te))-1;}{ + throw LocatedParseError("Value in \\o{...} sequence is non-octal or missing braces"); + }} + break; + case 430: +#line 1596 "Parser.rl" + {{p = ((te))-1;}{ + throw LocatedParseError("Value in \\x{...} sequence is non-hex or missing }"); + }} + break; + case 431: +#line 1719 "Parser.rl" + {{p = ((te))-1;}{ + ostringstream str; + str << "\\k at index " << ts - ptr << " not supported."; + throw ParseError(str.str()); + }} + break; + case 432: +#line 396 "Parser.rl" + {{p = ((te))-1;}{ + throw LocatedParseError("Conditional subpattern unsupported"); + }} + break; + case 433: +#line 1852 "Parser.rl" + {{p = ((te))-1;}{ + throw LocatedParseError("Unrecognised character after (?"); + }} + break; + case 434: +#line 1884 "Parser.rl" + {{p = ((te))-1;}{ + assert(mode.utf8); + throwInvalidUtf8(); + }} + break; + case 435: +#line 1898 "Parser.rl" + {{p = ((te))-1;}{ + addLiteral(currentSeq, *ts, mode); + }} + break; + case 436: +#line 1 "NONE" + { switch( act ) { + case 288: + {{p = ((te))-1;} + // If there are enough capturing sub expressions, this may be + // a back reference + accumulator = parseAsDecimal(octAccumulator); + if (accumulator < groupIndex) { + currentSeq->addComponent(std::make_unique(accumulator)); + } else { + addEscapedOctal(currentSeq, octAccumulator, mode); + } + } + break; + case 290: + {{p = ((te))-1;} + // if there are enough left parens to this point, back ref + if (accumulator < groupIndex) { + currentSeq->addComponent(std::make_unique(accumulator)); + } else { + // Otherwise, we interpret the first three digits as an + // octal escape, and the remaining characters stand for + // themselves as literals. + const char *s = ts; + unsigned int accum = 0; + unsigned int oct_digits = 0; + assert(*s == '\\'); // token starts at backslash + for (++s; s < te && oct_digits < 3; ++oct_digits, ++s) { + u8 digit = *s - '0'; + if (digit < 8) { + accum = digit + accum * 8; + } else { + break; + } + } + + if (oct_digits > 0) { + addEscapedOctal(currentSeq, accum, mode); + } + + // And then the rest of the digits, if any, are literal. + for (; s < te; ++s) { + addLiteral(currentSeq, *s, mode); + } + } + } + break; + case 330: + {{p = ((te))-1;} + addLiteral(currentSeq, *(ts + 1), mode); + } + break; + } + } + break; +#line 5508 "Parser.cpp" + } + } + +_again: + _acts = _regex_actions + _regex_to_state_actions[cs]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) { + switch ( *_acts++ ) { + case 23: +#line 1 "NONE" + {ts = 0;} + break; +#line 5521 "Parser.cpp" + } + } + + if ( cs == 0 ) + goto _out; + if ( ++p != pe ) + goto _resume; + _test_eof: {} + if ( p == eof ) + { + if ( _regex_eof_trans[cs] > 0 ) { + _trans = _regex_eof_trans[cs] - 1; + goto _eof_trans; + } + const short *__acts = _regex_actions + _regex_eof_actions[cs]; + unsigned int __nacts = (unsigned int) *__acts++; + while ( __nacts-- > 0 ) { + switch ( *__acts++ ) { + case 22: +#line 730 "Parser.rl" + { throw LocatedParseError("Malformed property"); } + break; +#line 5544 "Parser.cpp" + } + } + } + + _out: {} + } + +#line 1984 "Parser.rl" + + if (p != pe && *p != '\0') { + // didn't make it to the end of our input, but we didn't throw a ParseError? + assert(0); + ostringstream str; + str << "Parse error at index " << (p - ptr) << "."; + throw ParseError(str.str()); + } + + if (currentCls) { + assert(inCharClass); + assert(currentClsBegin); + ostringstream oss; + oss << "Unterminated character class starting at index " + << currentClsBegin - ptr << "."; + throw ParseError(oss.str()); + } + + if (inComment) { + throw ParseError("Unterminated comment."); + } + + if (!sequences.empty()) { + ostringstream str; + str << "Missing close parenthesis for group started at index " + << sequences.back().seqOffset << "."; + throw ParseError(str.str()); + } + + // Unlikely, but possible + if (groupIndex > 65535) { + throw ParseError("The maximum number of capturing subexpressions is 65535."); + } + + // Finalize the top-level sequence, which will take care of any + // top-level alternation. + currentSeq->finalize(); + assert(currentSeq == rootSeq.get()); + + // Ensure that all references are valid. + checkReferences(*rootSeq, groupIndex, groupNames); + + return move(rootSeq); + } catch (LocatedParseError &error) { + if (ts >= ptr && ts <= pe) { + error.locate(ts - ptr); + } else { + error.locate(0); + } + throw; + } +} + +} // namespace ue2 diff --git a/contrib/vectorscan-cmake/rageled_files/control_verbs.cpp b/contrib/vectorscan-cmake/rageled_files/control_verbs.cpp new file mode 100644 index 00000000000..19b5c6955e1 --- /dev/null +++ b/contrib/vectorscan-cmake/rageled_files/control_verbs.cpp @@ -0,0 +1,443 @@ + +#line 1 "control_verbs.rl" +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Parser for control verbs that can occur at the beginning of a pattern. + */ + +#include "parser/control_verbs.h" + +#include "parser/Parser.h" +#include "parser/parse_error.h" + +#include +#include + +using namespace std; + +namespace ue2 { + +const char *read_control_verbs(const char *ptr, const char *end, size_t start, + ParseMode &mode) { + const char *p = ptr; + const char *pe = end; + const char *eof = pe; + const char *ts, *te; + int cs; + UNUSED int act; + + +#line 59 "control_verbs.cpp" +static const char _ControlVerbs_actions[] = { + 0, 1, 0, 1, 1, 1, 2, 1, + 3, 1, 4, 1, 5, 1, 6, 1, + 7, 1, 8, 1, 9 +}; + +static const unsigned char _ControlVerbs_key_offsets[] = { + 0, 7, 8, 10, 12, 14, 16, 18, + 20, 21, 23, 25, 27, 30, 32, 34, + 36, 38, 40, 42, 44, 46, 48, 50, + 52, 55, 57, 59, 61, 63, 66, 68, + 70, 72, 74, 76, 79, 82, 84, 86, + 88, 90, 92, 94, 96, 98, 100, 102, + 105, 107, 109, 111, 113, 115, 117, 119, + 121, 123, 125, 127, 129, 131, 133, 135, + 137, 139, 141, 143, 146, 148, 149, 151, + 155, 157, 159, 160, 161 +}; + +static const char _ControlVerbs_trans_keys[] = { + 41, 65, 66, 67, 76, 78, 85, 41, + 41, 78, 41, 89, 41, 67, 41, 82, + 41, 76, 41, 70, 41, 41, 83, 41, + 82, 41, 95, 41, 65, 85, 41, 78, + 41, 89, 41, 67, 41, 78, 41, 73, + 41, 67, 41, 79, 41, 68, 41, 69, + 41, 82, 41, 76, 41, 70, 73, 41, + 77, 41, 73, 41, 84, 41, 95, 41, + 77, 82, 41, 65, 41, 84, 41, 67, + 41, 72, 41, 61, 41, 48, 57, 41, + 48, 57, 41, 69, 41, 67, 41, 85, + 41, 82, 41, 83, 41, 73, 41, 79, + 41, 78, 41, 79, 41, 95, 41, 65, + 83, 41, 85, 41, 84, 41, 79, 41, + 95, 41, 80, 41, 79, 41, 83, 41, + 83, 41, 69, 41, 83, 41, 83, 41, + 84, 41, 65, 41, 82, 41, 84, 41, + 95, 41, 79, 41, 80, 41, 84, 41, + 67, 84, 41, 80, 41, 41, 70, 41, + 49, 51, 56, 41, 54, 41, 50, 41, + 40, 42, 0 +}; + +static const char _ControlVerbs_single_lengths[] = { + 7, 1, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 3, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 3, 2, 2, 2, 2, 3, 2, 2, + 2, 2, 2, 1, 1, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 3, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 3, 2, 1, 2, 4, + 2, 2, 1, 1, 1 +}; + +static const char _ControlVerbs_range_lengths[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0 +}; + +static const short _ControlVerbs_index_offsets[] = { + 0, 8, 10, 13, 16, 19, 22, 25, + 28, 30, 33, 36, 39, 43, 46, 49, + 52, 55, 58, 61, 64, 67, 70, 73, + 76, 80, 83, 86, 89, 92, 96, 99, + 102, 105, 108, 111, 114, 117, 120, 123, + 126, 129, 132, 135, 138, 141, 144, 147, + 151, 154, 157, 160, 163, 166, 169, 172, + 175, 178, 181, 184, 187, 190, 193, 196, + 199, 202, 205, 208, 212, 215, 217, 220, + 225, 228, 231, 233, 235 +}; + +static const char _ControlVerbs_indicies[] = { + 0, 2, 3, 4, 5, 6, 7, 1, + 8, 1, 8, 9, 1, 8, 10, 1, + 11, 12, 1, 8, 13, 1, 8, 14, + 1, 8, 15, 1, 11, 1, 8, 16, + 1, 8, 17, 1, 8, 18, 1, 8, + 19, 20, 1, 8, 21, 1, 8, 22, + 1, 8, 12, 1, 8, 23, 1, 8, + 24, 1, 8, 25, 1, 8, 26, 1, + 8, 27, 1, 8, 15, 1, 8, 28, + 1, 11, 14, 1, 8, 15, 29, 1, + 8, 30, 1, 8, 31, 1, 8, 32, + 1, 8, 33, 1, 8, 34, 35, 1, + 8, 36, 1, 8, 37, 1, 8, 38, + 1, 8, 39, 1, 8, 40, 1, 8, + 41, 1, 11, 41, 1, 8, 42, 1, + 8, 43, 1, 8, 44, 1, 8, 45, + 1, 8, 46, 1, 8, 47, 1, 8, + 48, 1, 8, 39, 1, 8, 49, 1, + 8, 50, 1, 8, 51, 52, 1, 8, + 53, 1, 8, 54, 1, 8, 55, 1, + 8, 56, 1, 8, 57, 1, 8, 58, + 1, 8, 59, 1, 8, 60, 1, 8, + 61, 1, 8, 62, 1, 8, 15, 1, + 8, 63, 1, 8, 64, 1, 8, 65, + 1, 8, 66, 1, 8, 67, 1, 8, + 68, 1, 8, 69, 1, 8, 15, 1, + 8, 70, 71, 1, 8, 72, 1, 73, + 1, 8, 74, 1, 75, 76, 77, 78, + 1, 8, 15, 1, 8, 15, 1, 75, + 1, 80, 79, 82, 81, 0 +}; + +static const char _ControlVerbs_trans_targs[] = { + 75, 1, 2, 9, 22, 24, 45, 67, + 75, 3, 4, 75, 5, 6, 7, 8, + 10, 11, 12, 13, 16, 14, 15, 17, + 18, 19, 20, 21, 23, 25, 26, 27, + 28, 29, 30, 37, 31, 32, 33, 34, + 35, 36, 38, 39, 40, 41, 42, 43, + 44, 46, 47, 48, 59, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 60, + 61, 62, 63, 64, 65, 66, 68, 70, + 69, 75, 71, 75, 72, 73, 74, 75, + 76, 75, 0 +}; + +static const char _ControlVerbs_trans_actions[] = { + 19, 0, 0, 0, 0, 0, 0, 0, + 13, 0, 0, 11, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 9, 0, 7, 0, 0, 0, 15, + 5, 17, 0 +}; + +static const char _ControlVerbs_to_state_actions[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0 +}; + +static const char _ControlVerbs_from_state_actions[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 3, 0 +}; + +static const short _ControlVerbs_eof_trans[] = { + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 82 +}; + +static const int ControlVerbs_start = 75; +static const int ControlVerbs_first_final = 75; +static const int ControlVerbs_error = -1; + +static const int ControlVerbs_en_main = 75; + + +#line 249 "control_verbs.cpp" + { + cs = ControlVerbs_start; + ts = 0; + te = 0; + act = 0; + } + +#line 105 "control_verbs.rl" + + + try { + +#line 262 "control_verbs.cpp" + { + int _klen; + unsigned int _trans; + const char *_acts; + unsigned int _nacts; + const char *_keys; + + if ( p == pe ) + goto _test_eof; +_resume: + _acts = _ControlVerbs_actions + _ControlVerbs_from_state_actions[cs]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) { + switch ( *_acts++ ) { + case 1: +#line 1 "NONE" + {ts = p;} + break; +#line 281 "control_verbs.cpp" + } + } + + _keys = _ControlVerbs_trans_keys + _ControlVerbs_key_offsets[cs]; + _trans = _ControlVerbs_index_offsets[cs]; + + _klen = _ControlVerbs_single_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + _klen - 1; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + ((_upper-_lower) >> 1); + if ( (*p) < *_mid ) + _upper = _mid - 1; + else if ( (*p) > *_mid ) + _lower = _mid + 1; + else { + _trans += (unsigned int)(_mid - _keys); + goto _match; + } + } + _keys += _klen; + _trans += _klen; + } + + _klen = _ControlVerbs_range_lengths[cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + (_klen<<1) - 2; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( (*p) < _mid[0] ) + _upper = _mid - 2; + else if ( (*p) > _mid[1] ) + _lower = _mid + 2; + else { + _trans += (unsigned int)((_mid - _keys)>>1); + goto _match; + } + } + _trans += _klen; + } + +_match: + _trans = _ControlVerbs_indicies[_trans]; +_eof_trans: + cs = _ControlVerbs_trans_targs[_trans]; + + if ( _ControlVerbs_trans_actions[_trans] == 0 ) + goto _again; + + _acts = _ControlVerbs_actions + _ControlVerbs_trans_actions[_trans]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + { + switch ( *_acts++ ) + { + case 2: +#line 1 "NONE" + {te = p+1;} + break; + case 3: +#line 76 "control_verbs.rl" + {te = p+1;{ + mode.utf8 = true; + }} + break; + case 4: +#line 80 "control_verbs.rl" + {te = p+1;{ + mode.ucp = true; + }} + break; + case 5: +#line 84 "control_verbs.rl" + {te = p+1;{ + ostringstream str; + str << "Unsupported control verb " << string(ts, te - ts); + throw LocatedParseError(str.str()); + }} + break; + case 6: +#line 90 "control_verbs.rl" + {te = p+1;{ + ostringstream str; + str << "Unknown control verb " << string(ts, te - ts); + throw LocatedParseError(str.str()); + }} + break; + case 7: +#line 97 "control_verbs.rl" + {te = p+1;{ + p--; + {p++; goto _out; } + }} + break; + case 8: +#line 97 "control_verbs.rl" + {te = p;p--;{ + p--; + {p++; goto _out; } + }} + break; + case 9: +#line 97 "control_verbs.rl" + {{p = ((te))-1;}{ + p--; + {p++; goto _out; } + }} + break; +#line 400 "control_verbs.cpp" + } + } + +_again: + _acts = _ControlVerbs_actions + _ControlVerbs_to_state_actions[cs]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) { + switch ( *_acts++ ) { + case 0: +#line 1 "NONE" + {ts = 0;} + break; +#line 413 "control_verbs.cpp" + } + } + + if ( ++p != pe ) + goto _resume; + _test_eof: {} + if ( p == eof ) + { + if ( _ControlVerbs_eof_trans[cs] > 0 ) { + _trans = _ControlVerbs_eof_trans[cs] - 1; + goto _eof_trans; + } + } + + _out: {} + } + +#line 109 "control_verbs.rl" + } catch (LocatedParseError &error) { + if (ts >= ptr && ts <= pe) { + error.locate(ts - ptr + start); + } else { + error.locate(0); + } + throw; + } + + return p; +} + +} // namespace ue2 diff --git a/contrib/hyperscan-cmake/x86_64/config.h b/contrib/vectorscan-cmake/x86_64/config.h similarity index 73% rename from contrib/hyperscan-cmake/x86_64/config.h rename to contrib/vectorscan-cmake/x86_64/config.h index 4786e3f4e21..eab2f3eb079 100644 --- a/contrib/hyperscan-cmake/x86_64/config.h +++ b/contrib/vectorscan-cmake/x86_64/config.h @@ -15,15 +15,42 @@ /* "Define if building for EM64T" */ #define ARCH_X86_64 +/* "Define if building for ARM32" */ +/* #undef ARCH_ARM32 */ + +/* "Define if building for AARCH64" */ +/* #undef ARCH_AARCH64 */ + +/* "Define if building for PPC64EL" */ +/* #undef ARCH_PPC64EL */ + +/* "Define if cross compiling for AARCH64" */ +/* #undef CROSS_COMPILE_AARCH64 */ + +/* Define if building SVE for AARCH64. */ +/* #undef BUILD_SVE */ + +/* Define if building SVE2 for AARCH64. */ +/* #undef BUILD_SVE2 */ + +/* Define if building SVE2+BITPERM for AARCH64. */ +/* #undef BUILD_SVE2_BITPERM */ + /* internal build, switch on dump support. */ /* #undef DUMP_SUPPORT */ /* Define if building "fat" runtime. */ /* #undef FAT_RUNTIME */ +/* Define if building AVX2 in the fat runtime. */ +/* #undef BUILD_AVX2 */ + /* Define if building AVX-512 in the fat runtime. */ /* #undef BUILD_AVX512 */ +/* Define if building AVX512VBMI in the fat runtime. */ +/* #undef BUILD_AVX512VBMI */ + /* Define to 1 if `backtrace' works. */ #define HAVE_BACKTRACE @@ -45,6 +72,15 @@ /* C compiler has intrin.h */ /* #undef HAVE_C_INTRIN_H */ +/* C compiler has arm_neon.h */ +/* #undef HAVE_C_ARM_NEON_H */ + +/* C compiler has arm_sve.h */ +/* #undef HAVE_C_ARM_SVE_H */ + +/* C compiler has arm_neon.h */ +/* #undef HAVE_C_PPC64EL_ALTIVEC_H */ + /* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to 0 if you don't. */ /* #undef HAVE_DECL_PTHREAD_SETAFFINITY_NP */ @@ -85,7 +121,7 @@ /* #undef HAVE__ALIGNED_MALLOC */ /* Define if compiler has __builtin_constant_p */ -#define HAVE__BUILTIN_CONSTANT_P +/* #undef HAVE__BUILTIN_CONSTANT_P */ /* Optimize, inline critical functions */ #define HS_OPTIMIZE diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 3936b613bcb..1fbbff7dcca 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -40,7 +40,7 @@ The list of third-party libraries: | googletest | [BSD 3-clause](https://github.com/google/googletest/blob/e7e591764baba0a0c3c9ad0014430e7a27331d16/LICENSE) | | grpc | [Apache](https://github.com/ClickHouse-Extras/grpc/blob/60c986e15cae70aade721d26badabab1f822fdd6/LICENSE) | | h3 | [Apache](https://github.com/ClickHouse-Extras/h3/blob/c7f46cfd71fb60e2fefc90e28abe81657deff735/LICENSE) | -| hyperscan | [Boost](https://github.com/ClickHouse-Extras/hyperscan/blob/e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa/LICENSE) | +| vectorscan | [Boost](https://github.com/ClickHouse-Extras/hyperscan/blob/73695e419c27af7fe2a099c7aa57931cc02aea5d/LICENSE) | | icu | [Public Domain](https://github.com/unicode-org/icu/blob/a56dde820dc35665a66f2e9ee8ba58e75049b668/icu4c/LICENSE) | | icudata | [Public Domain](https://github.com/ClickHouse-Extras/icudata/blob/72d9a4a7febc904e2b0a534ccb25ae40fac5f1e5/LICENSE) | | jemalloc | [BSD 2-clause](https://github.com/ClickHouse-Extras/jemalloc/blob/e6891d9746143bf2cf617493d880ba5a0b9a3efd/COPYING) | diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 60386908f01..a37891377f4 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -86,8 +86,8 @@ if (TARGET ch_contrib::h3) target_link_libraries (clickhouse_functions PRIVATE ch_contrib::h3) endif() -if (TARGET ch_contrib::hyperscan) - target_link_libraries(clickhouse_functions PRIVATE ch_contrib::hyperscan) +if (TARGET ch_contrib::vectorscan) + target_link_libraries(clickhouse_functions PRIVATE ch_contrib::vectorscan) endif() if (TARGET ch_contrib::simdjson) diff --git a/src/Functions/MultiMatchAllIndicesImpl.h b/src/Functions/MultiMatchAllIndicesImpl.h index adf9e9b585f..80a71548deb 100644 --- a/src/Functions/MultiMatchAllIndicesImpl.h +++ b/src/Functions/MultiMatchAllIndicesImpl.h @@ -9,7 +9,7 @@ #include "config_functions.h" #include -#if USE_HYPERSCAN +#if USE_VECTORSCAN # include #endif @@ -60,7 +60,7 @@ struct MultiMatchAllIndicesImpl [[maybe_unused]] std::optional edit_distance) { offsets.resize(haystack_offsets.size()); -#if USE_HYPERSCAN +#if USE_VECTORSCAN const auto & hyperscan_regex = MultiRegexps::get(needles, edit_distance); hs_scratch_t * scratch = nullptr; hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch); @@ -97,7 +97,7 @@ struct MultiMatchAllIndicesImpl on_match, &res); if (err != HS_SUCCESS) - throw Exception("Failed to scan with hyperscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT); + throw Exception("Failed to scan with vectorscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT); offsets[i] = res.size(); offset = haystack_offsets[i]; } @@ -108,9 +108,9 @@ struct MultiMatchAllIndicesImpl (void)res; (void)offsets; throw Exception( - "multi-search all indices is not implemented when hyperscan is off (is it x86 processor?)", + "multi-search all indices is not implemented when vectorscan is off", ErrorCodes::NOT_IMPLEMENTED); -#endif // USE_HYPERSCAN +#endif // USE_VECTORSCAN } }; diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index 8a65c8cb2b4..fbbefe7be1d 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -8,7 +8,7 @@ #include "config_functions.h" #include -#if USE_HYPERSCAN +#if USE_VECTORSCAN # include #else # include "MatchImpl.h" @@ -64,13 +64,13 @@ struct MultiMatchAnyImpl (void)FindAny; (void)FindAnyIndex; res.resize(haystack_offsets.size()); -#if USE_HYPERSCAN +#if USE_VECTORSCAN const auto & hyperscan_regex = MultiRegexps::get(needles, edit_distance); hs_scratch_t * scratch = nullptr; hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch); if (err != HS_SUCCESS) - throw Exception("Could not clone scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY); + throw Exception("Could not clone scratch space for vectorscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY); MultiRegexps::ScratchPtr smart_scratch(scratch); @@ -92,7 +92,7 @@ struct MultiMatchAnyImpl for (size_t i = 0; i < haystack_offsets_size; ++i) { UInt64 length = haystack_offsets[i] - offset - 1; - /// Hyperscan restriction. + /// Vectorscan restriction. if (length > std::numeric_limits::max()) throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES); /// Zero the result, scan, check, update the offset. @@ -106,14 +106,14 @@ struct MultiMatchAnyImpl on_match, &res[i]); if (err != HS_SUCCESS && err != HS_SCAN_TERMINATED) - throw Exception("Failed to scan with hyperscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT); + throw Exception("Failed to scan with vectorscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT); offset = haystack_offsets[i]; } #else - /// Fallback if do not use hyperscan + /// Fallback if do not use vectorscan if constexpr (MultiSearchDistance) throw Exception( - "Edit distance multi-search is not implemented when hyperscan is off (is it x86 processor?)", + "Edit distance multi-search is not implemented when vectorscan is off", ErrorCodes::NOT_IMPLEMENTED); PaddedPODArray accum(res.size()); memset(res.data(), 0, res.size() * sizeof(res.front())); @@ -129,7 +129,7 @@ struct MultiMatchAnyImpl res[i] = j + 1; } } -#endif // USE_HYPERSCAN +#endif // USE_VECTORSCAN } }; diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index 952e27b29bc..ac37875f91e 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -16,7 +16,7 @@ #include "config_functions.h" -#if USE_HYPERSCAN +#if USE_VECTORSCAN # include #endif @@ -103,7 +103,7 @@ private: } -#if USE_HYPERSCAN +#if USE_VECTORSCAN namespace MultiRegexps { @@ -312,6 +312,6 @@ inline Regexps * get(const std::vector & patterns, std::optional Date: Fri, 24 Jun 2022 13:19:29 +0200 Subject: [PATCH 057/101] Simplify method signature --- .../MergeTree/MergePlainMergeTreeTask.cpp | 6 +- src/Storages/MergeTree/MergeTreeData.cpp | 77 ++++++------------- src/Storages/MergeTree/MergeTreeData.h | 29 ++++--- .../MergeTree/MergeTreeDataMergerMutator.cpp | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 4 +- .../MergeTree/MutateFromLogEntryTask.cpp | 2 +- .../MergeTree/MutatePlainMergeTreeTask.cpp | 6 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 2 +- src/Storages/StorageMergeTree.cpp | 15 +++- src/Storages/StorageReplicatedMergeTree.cpp | 12 +-- 10 files changed, 74 insertions(+), 81 deletions(-) diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index c6a719fbc67..90f04eee019 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -115,7 +115,11 @@ void MergePlainMergeTreeTask::prepare() void MergePlainMergeTreeTask::finish() { new_part = merge_task->getFuture().get(); - storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, nullptr); + + MergeTreeData::Transaction transaction(storage, txn.get()); + storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, &transaction); + transaction.commit(); + write_part_log({}); storage.incrementMergedPartsProfileEvent(new_part->getType()); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c24636a56f8..98dc099140a 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2787,19 +2787,19 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace( bool MergeTreeData::renameTempPartAndAdd( MutableDataPartPtr & part, MergeTreeTransaction * txn, + Transaction & out_transaction, SimpleIncrement * increment, - Transaction * out_transaction, MergeTreeDeduplicationLog * deduplication_log, std::string_view deduplication_token) { - if (out_transaction && &out_transaction->data != this) + if (&out_transaction.data != this) throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", ErrorCodes::LOGICAL_ERROR); DataPartsVector covered_parts; { auto lock = lockParts(); - if (!renameTempPartAndReplace(part, txn, increment, out_transaction, lock, &covered_parts, deduplication_log, deduplication_token)) + if (!renameTempPartAndReplaceImpl(part, txn, increment, out_transaction, lock, &covered_parts, deduplication_log, deduplication_token)) return false; } if (!covered_parts.empty()) @@ -2810,17 +2810,17 @@ bool MergeTreeData::renameTempPartAndAdd( } -bool MergeTreeData::renameTempPartAndReplace( +bool MergeTreeData::renameTempPartAndReplaceImpl( MutableDataPartPtr & part, MergeTreeTransaction * txn, SimpleIncrement * increment, - Transaction * out_transaction, + Transaction & out_transaction, std::unique_lock & lock, DataPartsVector * out_covered_parts, MergeTreeDeduplicationLog * deduplication_log, std::string_view deduplication_token) { - if (out_transaction && &out_transaction->data != this) + if (&out_transaction.data != this) throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", ErrorCodes::LOGICAL_ERROR); @@ -2901,46 +2901,10 @@ bool MergeTreeData::renameTempPartAndReplace( part->setState(DataPartState::PreActive); part->renameTo(part_name, true); - auto part_it = data_parts_indexes.insert(part).first; + data_parts_indexes.insert(part); - if (out_transaction) - { - chassert(out_transaction->txn == txn); - out_transaction->precommitted_parts.insert(part); - } - else - { - /// FIXME Transactions: it's not the best place for checking and setting removal_tid, - /// because it's too optimistic. We should lock removal_tid of covered parts at the beginning of operation. - MergeTreeTransaction::addNewPartAndRemoveCovered(shared_from_this(), part, covered_parts, txn); - - size_t reduce_bytes = 0; - size_t reduce_rows = 0; - size_t reduce_parts = 0; - auto current_time = time(nullptr); - for (const DataPartPtr & covered_part : covered_parts) - { - covered_part->remove_time.store(current_time, std::memory_order_relaxed); - modifyPartState(covered_part, DataPartState::Outdated); - removePartContributionToColumnAndSecondaryIndexSizes(covered_part); - reduce_bytes += covered_part->getBytesOnDisk(); - reduce_rows += covered_part->rows_count; - ++reduce_parts; - } - - modifyPartState(part_it, DataPartState::Active); - addPartContributionToColumnAndSecondaryIndexSizes(part); - - if (covered_parts.empty()) - updateObjectColumns(*part_it, lock); - else - resetObjectColumnsFromActiveParts(lock); - - ssize_t diff_bytes = part->getBytesOnDisk() - reduce_bytes; - ssize_t diff_rows = part->rows_count - reduce_rows; - ssize_t diff_parts = 1 - reduce_parts; - increaseDataVolume(diff_bytes, diff_rows, diff_parts); - } + chassert(out_transaction.txn == txn); + out_transaction.precommitted_parts.insert(part); auto part_in_memory = asInMemoryPart(part); if (part_in_memory && getSettings()->in_memory_parts_enable_wal) @@ -2959,17 +2923,24 @@ bool MergeTreeData::renameTempPartAndReplace( } MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( - MutableDataPartPtr & part, MergeTreeTransaction * txn, SimpleIncrement * increment, - Transaction * out_transaction, MergeTreeDeduplicationLog * deduplication_log) + MutableDataPartPtr & part, + MergeTreeTransaction * txn, + Transaction & out_transaction, + SimpleIncrement * increment, + MergeTreeDeduplicationLog * deduplication_log, + DataPartsLock * lock) { - if (out_transaction && &out_transaction->data != this) - throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", - ErrorCodes::LOGICAL_ERROR); - DataPartsVector covered_parts; { - auto lock = lockParts(); - renameTempPartAndReplace(part, txn, increment, out_transaction, lock, &covered_parts, deduplication_log); + if (!lock) + { + auto part_lock = lockParts(); + renameTempPartAndReplaceImpl(part, txn, increment, out_transaction, part_lock, &covered_parts, deduplication_log); + } + else + { + renameTempPartAndReplaceImpl(part, txn, increment, out_transaction, *lock, &covered_parts, deduplication_log); + } } return covered_parts; } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 00a56de9142..1f0ee44518b 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -557,8 +557,8 @@ public: bool renameTempPartAndAdd( MutableDataPartPtr & part, MergeTreeTransaction * txn, + Transaction & transaction, SimpleIncrement * increment = nullptr, - Transaction * out_transaction = nullptr, MergeTreeDeduplicationLog * deduplication_log = nullptr, std::string_view deduplication_token = std::string_view()); @@ -566,20 +566,12 @@ public: /// Returns all parts covered by the added part (in ascending order). /// If out_transaction == nullptr, marks covered parts as Outdated. DataPartsVector renameTempPartAndReplace( - MutableDataPartPtr & part, MergeTreeTransaction * txn, SimpleIncrement * increment = nullptr, - Transaction * out_transaction = nullptr, MergeTreeDeduplicationLog * deduplication_log = nullptr); - - /// Low-level version of previous one, doesn't lock mutex - /// FIXME Transactions: remove add_to_txn flag, maybe merge MergeTreeTransaction and Transaction - bool renameTempPartAndReplace( MutableDataPartPtr & part, MergeTreeTransaction * txn, - SimpleIncrement * increment, - Transaction * out_transaction, - DataPartsLock & lock, - DataPartsVector * out_covered_parts = nullptr, + Transaction & out_transaction, + SimpleIncrement * increment = nullptr, MergeTreeDeduplicationLog * deduplication_log = nullptr, - std::string_view deduplication_token = std::string_view()); + DataPartsLock * lock = nullptr); /// Remove parts from working set immediately (without wait for background /// process). Transfer part state to temporary. Have very limited usage only @@ -1251,6 +1243,19 @@ protected: static void incrementMergedPartsProfileEvent(MergeTreeDataPartType type); private: + + /// Low-level version of previous one, doesn't lock mutex + /// FIXME Transactions: remove add_to_txn flag, maybe merge MergeTreeTransaction and Transaction + bool renameTempPartAndReplaceImpl( + MutableDataPartPtr & part, + MergeTreeTransaction * txn, + SimpleIncrement * increment, + Transaction & out_transaction, + DataPartsLock & lock, + DataPartsVector * out_covered_parts = nullptr, + MergeTreeDeduplicationLog * deduplication_log = nullptr, + std::string_view deduplication_token = std::string_view()); + /// RAII Wrapper for atomic work with currently moving parts /// Acquire them in constructor and remove them in destructor /// Uses data.currently_moving_parts_mutex diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 77e3b574804..f2dc679364d 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -549,7 +549,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart "but transactions were enabled for this table"); /// Rename new part, add to the set and remove original parts. - auto replaced_parts = data.renameTempPartAndReplace(new_data_part, txn.get(), nullptr, out_transaction); + auto replaced_parts = data.renameTempPartAndReplace(new_data_part, txn.get(), *out_transaction); /// Let's check that all original parts have been deleted and only them. if (replaced_parts.size() != parts.size()) diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index fbc916ddb2c..f02274ccaf5 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -133,9 +133,11 @@ void MergeTreeSink::finishDelayedChunk() auto & part = partition.temp_part.part; + MergeTreeData::Transaction transaction(storage, context->getCurrentTransaction().get()); /// Part can be deduplicated, so increment counters and add to part log only if it's really added - if (storage.renameTempPartAndAdd(part, context->getCurrentTransaction().get(), &storage.increment, nullptr, storage.getDeduplicationLog(), partition.block_dedup_token)) + if (storage.renameTempPartAndAdd(part, context->getCurrentTransaction().get(), transaction, &storage.increment, storage.getDeduplicationLog(), partition.block_dedup_token)) { + transaction.commit(); PartLog::addNewPart(storage.getContext(), part, partition.elapsed_ns); storage.incrementInsertedPartsProfileEvent(part->getType()); diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 403d77165d4..b665f5407ef 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -171,7 +171,7 @@ bool MutateFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrit { new_part = mutate_task->getFuture().get(); - storage.renameTempPartAndReplace(new_part, NO_TRANSACTION_RAW, nullptr, transaction_ptr.get()); + storage.renameTempPartAndReplace(new_part, NO_TRANSACTION_RAW, *transaction_ptr); try { diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 80a33bfe0e3..643ddf8bf6b 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -83,8 +83,12 @@ bool MutatePlainMergeTreeTask::executeStep() new_part = mutate_task->getFuture().get(); + + MergeTreeData::Transaction transaction(storage, merge_mutate_entry->txn.get()); /// FIXME Transactions: it's too optimistic, better to lock parts before starting transaction - storage.renameTempPartAndReplace(new_part, merge_mutate_entry->txn.get()); + storage.renameTempPartAndReplace(new_part, merge_mutate_entry->txn.get(), transaction); + transaction.commit(); + storage.updateMutationEntriesErrors(future_part, true, ""); write_part_log({}); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index d217e16c830..f974ce9bd6b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -476,7 +476,7 @@ void ReplicatedMergeTreeSink::commitPart( try { - renamed = storage.renameTempPartAndAdd(part, NO_TRANSACTION_RAW, nullptr, &transaction); + renamed = storage.renameTempPartAndAdd(part, NO_TRANSACTION_RAW, transaction); } catch (const Exception & e) { diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 8a265133d0e..24aac910b56 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1542,7 +1542,10 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( loaded_parts[i]->storeVersionMetadata(); String old_name = renamed_parts.old_and_new_names[i].old_name; - renameTempPartAndAdd(loaded_parts[i], local_context->getCurrentTransaction().get(), &increment); + MergeTreeData::Transaction transaction(*this, local_context->getCurrentTransaction().get()); + renameTempPartAndAdd(loaded_parts[i], local_context->getCurrentTransaction().get(), transaction, &increment); + transaction.commit(); + renamed_parts.old_and_new_names[i].old_name.clear(); results.push_back(PartitionCommandResultInfo{ @@ -1616,7 +1619,7 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con /// Populate transaction for (MutableDataPartPtr & part : dst_parts) - renameTempPartAndReplace(part, local_context->getCurrentTransaction().get(), &increment, &transaction, data_parts_lock); + renameTempPartAndReplace(part, local_context->getCurrentTransaction().get(), transaction, &increment, nullptr, &data_parts_lock); transaction.commit(&data_parts_lock); @@ -1694,7 +1697,7 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const DataPartsLock lock(mutex); for (MutableDataPartPtr & part : dst_parts) - dest_table_storage->renameTempPartAndReplace(part, local_context->getCurrentTransaction().get(), &dest_table_storage->increment, &transaction, lock); + dest_table_storage->renameTempPartAndReplace(part, local_context->getCurrentTransaction().get(), transaction, &dest_table_storage->increment, nullptr, &lock); removePartsFromWorkingSet(local_context->getCurrentTransaction().get(), src_parts, true, lock); transaction.commit(&lock); @@ -1787,7 +1790,11 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ void StorageMergeTree::attachRestoredParts(MutableDataPartsVector && parts) { for (auto part : parts) - renameTempPartAndAdd(part, NO_TRANSACTION_RAW, &increment); + { + MergeTreeData::Transaction transaction(*this, NO_TRANSACTION_RAW); + renameTempPartAndAdd(part, NO_TRANSACTION_RAW, transaction, &increment); + transaction.commit(); + } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e93399918ef..36a199458a5 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1657,7 +1657,7 @@ bool StorageReplicatedMergeTree::executeLogEntry(LogEntry & entry) Transaction transaction(*this, NO_TRANSACTION_RAW); part->version.setCreationTID(Tx::PrehistoricTID, nullptr); - renameTempPartAndReplace(part, NO_TRANSACTION_RAW, nullptr, &transaction); + renameTempPartAndReplace(part, NO_TRANSACTION_RAW, transaction); checkPartChecksumsAndCommit(transaction, part); writePartLog(PartLogElement::Type::NEW_PART, {}, 0 /** log entry is fake so we don't measure the time */, @@ -2342,7 +2342,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) Coordination::Requests ops; for (PartDescriptionPtr & part_desc : final_parts) { - renameTempPartAndReplace(part_desc->res_part, NO_TRANSACTION_RAW, nullptr, &transaction); + renameTempPartAndReplace(part_desc->res_part, NO_TRANSACTION_RAW, transaction); getCommitPartOps(ops, part_desc->res_part); lockSharedData(*part_desc->res_part, false, part_desc->hardlinked_files); @@ -4081,7 +4081,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora if (!to_detached) { Transaction transaction(*this, NO_TRANSACTION_RAW); - renameTempPartAndReplace(part, NO_TRANSACTION_RAW, nullptr, &transaction); + renameTempPartAndReplace(part, NO_TRANSACTION_RAW, transaction); replaced_parts = checkPartChecksumsAndCommit(transaction, part, hardlinked_files); @@ -6604,7 +6604,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( auto data_parts_lock = lockParts(); for (MutableDataPartPtr & part : dst_parts) - renameTempPartAndReplace(part, query_context->getCurrentTransaction().get(), nullptr, &transaction, data_parts_lock); + renameTempPartAndReplace(part, query_context->getCurrentTransaction().get(), transaction, nullptr, nullptr, &data_parts_lock); } for (size_t i = 0; i < dst_parts.size(); ++i) @@ -6841,7 +6841,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta DataPartsLock lock(mutex); for (MutableDataPartPtr & part : dst_parts) - dest_table_storage->renameTempPartAndReplace(part, query_context->getCurrentTransaction().get(), nullptr, &transaction, lock); + dest_table_storage->renameTempPartAndReplace(part, query_context->getCurrentTransaction().get(), transaction, nullptr, nullptr, &lock); for (size_t i = 0; i < dst_parts.size(); ++i) dest_table_storage->lockSharedData(*dst_parts[i], false, hardlinked_files_for_parts[i]); @@ -8020,7 +8020,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP try { MergeTreeData::Transaction transaction(*this, NO_TRANSACTION_RAW); - auto replaced_parts = renameTempPartAndReplace(new_data_part, NO_TRANSACTION_RAW, nullptr, &transaction); + auto replaced_parts = renameTempPartAndReplace(new_data_part, NO_TRANSACTION_RAW, transaction); if (!replaced_parts.empty()) { From af1a9d18abb1743911a42aa9791a88b62e23b0b2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 24 Jun 2022 13:34:00 +0200 Subject: [PATCH 058/101] Remove transaction argument --- .../MergeTree/MergeFromLogEntryTask.cpp | 2 +- .../MergeTree/MergePlainMergeTreeTask.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 21 ++++++++++--------- src/Storages/MergeTree/MergeTreeData.h | 5 +---- .../MergeTree/MergeTreeDataMergerMutator.cpp | 4 ++-- .../MergeTree/MergeTreeDataMergerMutator.h | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 2 +- .../MergeTree/MutateFromLogEntryTask.cpp | 2 +- .../MergeTree/MutatePlainMergeTreeTask.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 2 +- src/Storages/StorageMergeTree.cpp | 8 +++---- src/Storages/StorageReplicatedMergeTree.cpp | 12 +++++------ 12 files changed, 31 insertions(+), 33 deletions(-) diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 048c460c549..4121262e7b5 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -264,7 +264,7 @@ bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrite /// Task is not needed merge_task.reset(); - storage.merger_mutator.renameMergedTemporaryPart(part, parts, NO_TRANSACTION_PTR, transaction_ptr.get()); + storage.merger_mutator.renameMergedTemporaryPart(part, parts, NO_TRANSACTION_PTR, *transaction_ptr); try { diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index 90f04eee019..cc5e87956a1 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -117,7 +117,7 @@ void MergePlainMergeTreeTask::finish() new_part = merge_task->getFuture().get(); MergeTreeData::Transaction transaction(storage, txn.get()); - storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, &transaction); + storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, transaction); transaction.commit(); write_part_log({}); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 98dc099140a..15ee92a875e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2786,7 +2786,6 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace( bool MergeTreeData::renameTempPartAndAdd( MutableDataPartPtr & part, - MergeTreeTransaction * txn, Transaction & out_transaction, SimpleIncrement * increment, MergeTreeDeduplicationLog * deduplication_log, @@ -2799,7 +2798,7 @@ bool MergeTreeData::renameTempPartAndAdd( DataPartsVector covered_parts; { auto lock = lockParts(); - if (!renameTempPartAndReplaceImpl(part, txn, increment, out_transaction, lock, &covered_parts, deduplication_log, deduplication_token)) + if (!renameTempPartAndReplaceImpl(part, increment, out_transaction, lock, &covered_parts, deduplication_log, deduplication_token)) return false; } if (!covered_parts.empty()) @@ -2812,7 +2811,6 @@ bool MergeTreeData::renameTempPartAndAdd( bool MergeTreeData::renameTempPartAndReplaceImpl( MutableDataPartPtr & part, - MergeTreeTransaction * txn, SimpleIncrement * increment, Transaction & out_transaction, std::unique_lock & lock, @@ -2824,9 +2822,6 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", ErrorCodes::LOGICAL_ERROR); - if (txn) - transactions_enabled.store(true); - part->assertState({DataPartState::Temporary}); MergeTreePartInfo part_info = part->info; @@ -2903,7 +2898,6 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( data_parts_indexes.insert(part); - chassert(out_transaction.txn == txn); out_transaction.precommitted_parts.insert(part); auto part_in_memory = asInMemoryPart(part); @@ -2924,7 +2918,6 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( MutableDataPartPtr & part, - MergeTreeTransaction * txn, Transaction & out_transaction, SimpleIncrement * increment, MergeTreeDeduplicationLog * deduplication_log, @@ -2935,11 +2928,11 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( if (!lock) { auto part_lock = lockParts(); - renameTempPartAndReplaceImpl(part, txn, increment, out_transaction, part_lock, &covered_parts, deduplication_log); + renameTempPartAndReplaceImpl(part, increment, out_transaction, part_lock, &covered_parts, deduplication_log); } else { - renameTempPartAndReplaceImpl(part, txn, increment, out_transaction, *lock, &covered_parts, deduplication_log); + renameTempPartAndReplaceImpl(part, increment, out_transaction, *lock, &covered_parts, deduplication_log); } } return covered_parts; @@ -4850,6 +4843,14 @@ MergeTreeData::DataPartPtr MergeTreeData::getAnyPartInPartition( } +MergeTreeData::Transaction::Transaction(MergeTreeData & data_, MergeTreeTransaction * txn_) + : data(data_) + , txn(txn_) +{ + if (txn) + data.transactions_enabled.store(true); +} + void MergeTreeData::Transaction::rollbackPartsToTemporaryState() { if (!isEmpty()) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 1f0ee44518b..c69968b0162 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -252,7 +252,7 @@ public: class Transaction : private boost::noncopyable { public: - Transaction(MergeTreeData & data_, MergeTreeTransaction * txn_) : data(data_), txn(txn_) {} + Transaction(MergeTreeData & data_, MergeTreeTransaction * txn_); DataPartsVector commit(MergeTreeData::DataPartsLock * acquired_parts_lock = nullptr); @@ -556,7 +556,6 @@ public: /// Returns true if part was added. Returns false if part is covered by bigger part. bool renameTempPartAndAdd( MutableDataPartPtr & part, - MergeTreeTransaction * txn, Transaction & transaction, SimpleIncrement * increment = nullptr, MergeTreeDeduplicationLog * deduplication_log = nullptr, @@ -567,7 +566,6 @@ public: /// If out_transaction == nullptr, marks covered parts as Outdated. DataPartsVector renameTempPartAndReplace( MutableDataPartPtr & part, - MergeTreeTransaction * txn, Transaction & out_transaction, SimpleIncrement * increment = nullptr, MergeTreeDeduplicationLog * deduplication_log = nullptr, @@ -1248,7 +1246,6 @@ private: /// FIXME Transactions: remove add_to_txn flag, maybe merge MergeTreeTransaction and Transaction bool renameTempPartAndReplaceImpl( MutableDataPartPtr & part, - MergeTreeTransaction * txn, SimpleIncrement * increment, Transaction & out_transaction, DataPartsLock & lock, diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index f2dc679364d..c740489b76d 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -541,7 +541,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart MergeTreeData::MutableDataPartPtr & new_data_part, const MergeTreeData::DataPartsVector & parts, const MergeTreeTransactionPtr & txn, - MergeTreeData::Transaction * out_transaction) + MergeTreeData::Transaction & out_transaction) { /// Some of source parts was possibly created in transaction, so non-transactional merge may break isolation. if (data.transactions_enabled.load(std::memory_order_relaxed) && !txn) @@ -549,7 +549,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart "but transactions were enabled for this table"); /// Rename new part, add to the set and remove original parts. - auto replaced_parts = data.renameTempPartAndReplace(new_data_part, txn.get(), *out_transaction); + auto replaced_parts = data.renameTempPartAndReplace(new_data_part, out_transaction); /// Let's check that all original parts have been deleted and only them. if (replaced_parts.size() != parts.size()) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index e3d59a3522f..587b6b26347 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -133,7 +133,7 @@ public: MergeTreeData::MutableDataPartPtr & new_data_part, const MergeTreeData::DataPartsVector & parts, const MergeTreeTransactionPtr & txn, - MergeTreeData::Transaction * out_transaction = nullptr); + MergeTreeData::Transaction & out_transaction); /// The approximate amount of disk space needed for merge or mutation. With a surplus. diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index f02274ccaf5..52f6fc46d9c 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -135,7 +135,7 @@ void MergeTreeSink::finishDelayedChunk() MergeTreeData::Transaction transaction(storage, context->getCurrentTransaction().get()); /// Part can be deduplicated, so increment counters and add to part log only if it's really added - if (storage.renameTempPartAndAdd(part, context->getCurrentTransaction().get(), transaction, &storage.increment, storage.getDeduplicationLog(), partition.block_dedup_token)) + if (storage.renameTempPartAndAdd(part, transaction, &storage.increment, storage.getDeduplicationLog(), partition.block_dedup_token)) { transaction.commit(); PartLog::addNewPart(storage.getContext(), part, partition.elapsed_ns); diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index b665f5407ef..ac4de51bbd9 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -171,7 +171,7 @@ bool MutateFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrit { new_part = mutate_task->getFuture().get(); - storage.renameTempPartAndReplace(new_part, NO_TRANSACTION_RAW, *transaction_ptr); + storage.renameTempPartAndReplace(new_part, *transaction_ptr); try { diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 643ddf8bf6b..6214919337b 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -86,7 +86,7 @@ bool MutatePlainMergeTreeTask::executeStep() MergeTreeData::Transaction transaction(storage, merge_mutate_entry->txn.get()); /// FIXME Transactions: it's too optimistic, better to lock parts before starting transaction - storage.renameTempPartAndReplace(new_part, merge_mutate_entry->txn.get(), transaction); + storage.renameTempPartAndReplace(new_part, transaction); transaction.commit(); storage.updateMutationEntriesErrors(future_part, true, ""); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index f974ce9bd6b..d63c79b2006 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -476,7 +476,7 @@ void ReplicatedMergeTreeSink::commitPart( try { - renamed = storage.renameTempPartAndAdd(part, NO_TRANSACTION_RAW, transaction); + renamed = storage.renameTempPartAndAdd(part, transaction); } catch (const Exception & e) { diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 24aac910b56..f34ea34f347 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1543,7 +1543,7 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( String old_name = renamed_parts.old_and_new_names[i].old_name; MergeTreeData::Transaction transaction(*this, local_context->getCurrentTransaction().get()); - renameTempPartAndAdd(loaded_parts[i], local_context->getCurrentTransaction().get(), transaction, &increment); + renameTempPartAndAdd(loaded_parts[i], transaction, &increment); transaction.commit(); renamed_parts.old_and_new_names[i].old_name.clear(); @@ -1619,7 +1619,7 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con /// Populate transaction for (MutableDataPartPtr & part : dst_parts) - renameTempPartAndReplace(part, local_context->getCurrentTransaction().get(), transaction, &increment, nullptr, &data_parts_lock); + renameTempPartAndReplace(part, transaction, &increment, nullptr, &data_parts_lock); transaction.commit(&data_parts_lock); @@ -1697,7 +1697,7 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const DataPartsLock lock(mutex); for (MutableDataPartPtr & part : dst_parts) - dest_table_storage->renameTempPartAndReplace(part, local_context->getCurrentTransaction().get(), transaction, &dest_table_storage->increment, nullptr, &lock); + dest_table_storage->renameTempPartAndReplace(part, transaction, &dest_table_storage->increment, nullptr, &lock); removePartsFromWorkingSet(local_context->getCurrentTransaction().get(), src_parts, true, lock); transaction.commit(&lock); @@ -1792,7 +1792,7 @@ void StorageMergeTree::attachRestoredParts(MutableDataPartsVector && parts) for (auto part : parts) { MergeTreeData::Transaction transaction(*this, NO_TRANSACTION_RAW); - renameTempPartAndAdd(part, NO_TRANSACTION_RAW, transaction, &increment); + renameTempPartAndAdd(part, transaction, &increment); transaction.commit(); } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 36a199458a5..603ddc27ee0 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1657,7 +1657,7 @@ bool StorageReplicatedMergeTree::executeLogEntry(LogEntry & entry) Transaction transaction(*this, NO_TRANSACTION_RAW); part->version.setCreationTID(Tx::PrehistoricTID, nullptr); - renameTempPartAndReplace(part, NO_TRANSACTION_RAW, transaction); + renameTempPartAndReplace(part, transaction); checkPartChecksumsAndCommit(transaction, part); writePartLog(PartLogElement::Type::NEW_PART, {}, 0 /** log entry is fake so we don't measure the time */, @@ -2342,7 +2342,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) Coordination::Requests ops; for (PartDescriptionPtr & part_desc : final_parts) { - renameTempPartAndReplace(part_desc->res_part, NO_TRANSACTION_RAW, transaction); + renameTempPartAndReplace(part_desc->res_part, transaction); getCommitPartOps(ops, part_desc->res_part); lockSharedData(*part_desc->res_part, false, part_desc->hardlinked_files); @@ -4081,7 +4081,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora if (!to_detached) { Transaction transaction(*this, NO_TRANSACTION_RAW); - renameTempPartAndReplace(part, NO_TRANSACTION_RAW, transaction); + renameTempPartAndReplace(part, transaction); replaced_parts = checkPartChecksumsAndCommit(transaction, part, hardlinked_files); @@ -6604,7 +6604,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( auto data_parts_lock = lockParts(); for (MutableDataPartPtr & part : dst_parts) - renameTempPartAndReplace(part, query_context->getCurrentTransaction().get(), transaction, nullptr, nullptr, &data_parts_lock); + renameTempPartAndReplace(part, transaction, nullptr, nullptr, &data_parts_lock); } for (size_t i = 0; i < dst_parts.size(); ++i) @@ -6841,7 +6841,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta DataPartsLock lock(mutex); for (MutableDataPartPtr & part : dst_parts) - dest_table_storage->renameTempPartAndReplace(part, query_context->getCurrentTransaction().get(), transaction, nullptr, nullptr, &lock); + dest_table_storage->renameTempPartAndReplace(part, transaction, nullptr, nullptr, &lock); for (size_t i = 0; i < dst_parts.size(); ++i) dest_table_storage->lockSharedData(*dst_parts[i], false, hardlinked_files_for_parts[i]); @@ -8020,7 +8020,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP try { MergeTreeData::Transaction transaction(*this, NO_TRANSACTION_RAW); - auto replaced_parts = renameTempPartAndReplace(new_data_part, NO_TRANSACTION_RAW, transaction); + auto replaced_parts = renameTempPartAndReplace(new_data_part, transaction); if (!replaced_parts.empty()) { From 36b810b07629bbd8ffafd45c25c81391849ff998 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 24 Jun 2022 13:42:36 +0200 Subject: [PATCH 059/101] Remove unused parameter --- src/Storages/MergeTree/MergeTreeData.cpp | 5 ++--- src/Storages/MergeTree/MergeTreeData.h | 1 - src/Storages/StorageMergeTree.cpp | 4 ++-- src/Storages/StorageReplicatedMergeTree.cpp | 4 ++-- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 15ee92a875e..fdff6179a19 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2920,7 +2920,6 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( MutableDataPartPtr & part, Transaction & out_transaction, SimpleIncrement * increment, - MergeTreeDeduplicationLog * deduplication_log, DataPartsLock * lock) { DataPartsVector covered_parts; @@ -2928,11 +2927,11 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( if (!lock) { auto part_lock = lockParts(); - renameTempPartAndReplaceImpl(part, increment, out_transaction, part_lock, &covered_parts, deduplication_log); + renameTempPartAndReplaceImpl(part, increment, out_transaction, part_lock, &covered_parts); } else { - renameTempPartAndReplaceImpl(part, increment, out_transaction, *lock, &covered_parts, deduplication_log); + renameTempPartAndReplaceImpl(part, increment, out_transaction, *lock, &covered_parts); } } return covered_parts; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index c69968b0162..75c809b5268 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -568,7 +568,6 @@ public: MutableDataPartPtr & part, Transaction & out_transaction, SimpleIncrement * increment = nullptr, - MergeTreeDeduplicationLog * deduplication_log = nullptr, DataPartsLock * lock = nullptr); /// Remove parts from working set immediately (without wait for background diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index f34ea34f347..baa438a6d28 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1619,7 +1619,7 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con /// Populate transaction for (MutableDataPartPtr & part : dst_parts) - renameTempPartAndReplace(part, transaction, &increment, nullptr, &data_parts_lock); + renameTempPartAndReplace(part, transaction, &increment, &data_parts_lock); transaction.commit(&data_parts_lock); @@ -1697,7 +1697,7 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const DataPartsLock lock(mutex); for (MutableDataPartPtr & part : dst_parts) - dest_table_storage->renameTempPartAndReplace(part, transaction, &dest_table_storage->increment, nullptr, &lock); + dest_table_storage->renameTempPartAndReplace(part, transaction, &dest_table_storage->increment, &lock); removePartsFromWorkingSet(local_context->getCurrentTransaction().get(), src_parts, true, lock); transaction.commit(&lock); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 603ddc27ee0..daf929dcfe6 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6604,7 +6604,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( auto data_parts_lock = lockParts(); for (MutableDataPartPtr & part : dst_parts) - renameTempPartAndReplace(part, transaction, nullptr, nullptr, &data_parts_lock); + renameTempPartAndReplace(part, transaction, nullptr, &data_parts_lock); } for (size_t i = 0; i < dst_parts.size(); ++i) @@ -6841,7 +6841,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta DataPartsLock lock(mutex); for (MutableDataPartPtr & part : dst_parts) - dest_table_storage->renameTempPartAndReplace(part, transaction, nullptr, nullptr, &lock); + dest_table_storage->renameTempPartAndReplace(part, transaction, nullptr, &lock); for (size_t i = 0; i < dst_parts.size(); ++i) dest_table_storage->lockSharedData(*dst_parts[i], false, hardlinked_files_for_parts[i]); From 37310dc9df0292c165a2aef58102e422016799d0 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 24 Jun 2022 14:10:15 +0200 Subject: [PATCH 060/101] Simpler --- src/Storages/MergeTree/MergeTreeData.cpp | 34 ++++++++++----------- src/Storages/MergeTree/MergeTreeData.h | 9 ++++-- src/Storages/StorageMergeTree.cpp | 8 ++--- src/Storages/StorageReplicatedMergeTree.cpp | 7 ++--- 4 files changed, 28 insertions(+), 30 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index fdff6179a19..14644817238 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2791,10 +2791,6 @@ bool MergeTreeData::renameTempPartAndAdd( MergeTreeDeduplicationLog * deduplication_log, std::string_view deduplication_token) { - if (&out_transaction.data != this) - throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", - ErrorCodes::LOGICAL_ERROR); - DataPartsVector covered_parts; { auto lock = lockParts(); @@ -2919,24 +2915,28 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( MutableDataPartPtr & part, Transaction & out_transaction, - SimpleIncrement * increment, - DataPartsLock * lock) + SimpleIncrement * increment) { + auto part_lock = lockParts(); + DataPartsVector covered_parts; - { - if (!lock) - { - auto part_lock = lockParts(); - renameTempPartAndReplaceImpl(part, increment, out_transaction, part_lock, &covered_parts); - } - else - { - renameTempPartAndReplaceImpl(part, increment, out_transaction, *lock, &covered_parts); - } - } + renameTempPartAndReplaceImpl(part, increment, out_transaction, part_lock, &covered_parts); + return covered_parts; } +void MergeTreeData::renameTempPartsAndReplace( + MutableDataPartsVector & parts, + Transaction & out_transaction, + DataPartsLock & lock, + SimpleIncrement * increment) +{ + for (auto & part : parts) + renameTempPartAndReplaceImpl(part, increment, out_transaction, lock); +} + + + void MergeTreeData::removePartsFromWorkingSet(MergeTreeTransaction * txn, const MergeTreeData::DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock) { diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 75c809b5268..5ce59e560a2 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -567,8 +567,13 @@ public: DataPartsVector renameTempPartAndReplace( MutableDataPartPtr & part, Transaction & out_transaction, - SimpleIncrement * increment = nullptr, - DataPartsLock * lock = nullptr); + SimpleIncrement * increment = nullptr); + + void renameTempPartsAndReplace( + MutableDataPartsVector & parts, + Transaction & out_transaction, + DataPartsLock & lock, + SimpleIncrement * increment = nullptr); /// Remove parts from working set immediately (without wait for background /// process). Transfer part state to temporary. Have very limited usage only diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index baa438a6d28..9bb1db105c1 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1616,11 +1616,8 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con Transaction transaction(*this, local_context->getCurrentTransaction().get()); auto data_parts_lock = lockParts(); - /// Populate transaction - for (MutableDataPartPtr & part : dst_parts) - renameTempPartAndReplace(part, transaction, &increment, &data_parts_lock); - + renameTempPartsAndReplace(dst_parts, transaction, data_parts_lock, &increment); transaction.commit(&data_parts_lock); /// If it is REPLACE (not ATTACH), remove all parts which max_block_number less then min_block_number of the first new block @@ -1696,8 +1693,7 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const std::mutex mutex; DataPartsLock lock(mutex); - for (MutableDataPartPtr & part : dst_parts) - dest_table_storage->renameTempPartAndReplace(part, transaction, &dest_table_storage->increment, &lock); + dest_table_storage->renameTempPartsAndReplace(dst_parts, transaction, lock, &dest_table_storage->increment); removePartsFromWorkingSet(local_context->getCurrentTransaction().get(), src_parts, true, lock); transaction.commit(&lock); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index daf929dcfe6..209beb0a5f6 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6602,9 +6602,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( Transaction transaction(*this, NO_TRANSACTION_RAW); { auto data_parts_lock = lockParts(); - - for (MutableDataPartPtr & part : dst_parts) - renameTempPartAndReplace(part, transaction, nullptr, &data_parts_lock); + renameTempPartsAndReplace(dst_parts, transaction, data_parts_lock); } for (size_t i = 0; i < dst_parts.size(); ++i) @@ -6840,8 +6838,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta std::mutex mutex; DataPartsLock lock(mutex); - for (MutableDataPartPtr & part : dst_parts) - dest_table_storage->renameTempPartAndReplace(part, transaction, nullptr, &lock); + renameTempPartsAndReplace(dst_parts, transaction, lock); for (size_t i = 0; i < dst_parts.size(); ++i) dest_table_storage->lockSharedData(*dst_parts[i], false, hardlinked_files_for_parts[i]); From fdb4eb2552d26c5192b1b76739e28d11c46a4719 Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 24 Jun 2022 17:23:01 +0500 Subject: [PATCH 061/101] fixed tests --- tests/queries/0_stateless/02337_base58.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02337_base58.sql b/tests/queries/0_stateless/02337_base58.sql index 68dac97a20b..34da1da4c86 100644 --- a/tests/queries/0_stateless/02337_base58.sql +++ b/tests/queries/0_stateless/02337_base58.sql @@ -14,4 +14,4 @@ SELECT base58Decode(encoded, 'bitcoin') FROM (SELECT base58Encode(val, 'bitcoin' SELECT base58Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val); SELECT base58Decode(val) FROM (select arrayJoin(['', '2m', '8o8', 'bQbp', '3csAg9', 'CZJRhmz', 't1Zv2yaZ']) val); -SELECT base58Decode('Why_not?'); -- { serverError 1001 } +SELECT base58Decode('Why_not?'); -- { serverError 36 } From 571410a1723aa31cc9ff420d2c71860ada8c5109 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 24 Jun 2022 08:48:18 -0400 Subject: [PATCH 062/101] move settings to H3 level --- docs/en/engines/database-engines/index.md | 2 + .../database-engines/materialized-mysql.md | 50 ++++++++++++++----- .../materialized-postgresql.md | 14 +++--- 3 files changed, 47 insertions(+), 19 deletions(-) diff --git a/docs/en/engines/database-engines/index.md b/docs/en/engines/database-engines/index.md index 8e36aca695c..72689d29780 100644 --- a/docs/en/engines/database-engines/index.md +++ b/docs/en/engines/database-engines/index.md @@ -20,6 +20,8 @@ Here is a complete list of available database engines. Follow the links for more - [PostgreSQL](../../engines/database-engines/postgresql.md) +- [MaterializedMySQL](../../engines/database-engines/materialized-postgresql.md) + - [Replicated](../../engines/database-engines/replicated.md) - [SQLite](../../engines/database-engines/sqlite.md) diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index cbc40993da8..4b16d877210 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -26,14 +26,30 @@ ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'passwo - `user` — MySQL user. - `password` — User password. -**Engine Settings** +## Engine Settings -- `max_rows_in_buffer` — Maximum number of rows that data is allowed to cache in memory (for single table and the cache data unable to query). When this number is exceeded, the data will be materialized. Default: `65 505`. -- `max_bytes_in_buffer` — Maximum number of bytes that data is allowed to cache in memory (for single table and the cache data unable to query). When this number is exceeded, the data will be materialized. Default: `1 048 576`. -- `max_flush_data_time` — Maximum number of milliseconds that data is allowed to cache in memory (for database and the cache data unable to query). When this time is exceeded, the data will be materialized. Default: `1000`. -- `max_wait_time_when_mysql_unavailable` — Retry interval when MySQL is not available (milliseconds). Negative value disables retry. Default: `1000`. -- `allows_query_when_mysql_lost` — Allows to query a materialized table when MySQL is lost. Default: `0` (`false`). -- `materialized_mysql_tables_list` — a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated. +### max_rows_in_buffer + +`max_rows_in_buffer` — Maximum number of rows that data is allowed to cache in memory (for single table and the cache data unable to query). When this number is exceeded, the data will be materialized. Default: `65 505`. + +### max_bytes_in_buffer + +`max_bytes_in_buffer` — Maximum number of bytes that data is allowed to cache in memory (for single table and the cache data unable to query). When this number is exceeded, the data will be materialized. Default: `1 048 576`. + +### max_flush_data_time + +`max_flush_data_time` — Maximum number of milliseconds that data is allowed to cache in memory (for database and the cache data unable to query). When this time is exceeded, the data will be materialized. Default: `1000`. + +### max_wait_time_when_mysql_unavailable + +`max_wait_time_when_mysql_unavailable` — Retry interval when MySQL is not available (milliseconds). Negative value disables retry. Default: `1000`. + +### allows_query_when_mysql_lost +`allows_query_when_mysql_lost` — Allows to query a materialized table when MySQL is lost. Default: `0` (`false`). + +### materialized_mysql_tables_list + +`materialized_mysql_tables_list` — a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated. ```sql CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***') @@ -42,12 +58,17 @@ CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', max_wait_time_when_mysql_unavailable=10000; ``` -**Settings on MySQL-server Side** +## Settings on MySQL-server Side For the correct work of `MaterializedMySQL`, there are few mandatory `MySQL`-side configuration settings that must be set: -- `default_authentication_plugin = mysql_native_password` since `MaterializedMySQL` can only authorize with this method. -- `gtid_mode = on` since GTID based logging is a mandatory for providing correct `MaterializedMySQL` replication. +### default_authentication_plugin + +`default_authentication_plugin = mysql_native_password` since `MaterializedMySQL` can only authorize with this method. + +### gtid_mode + +`gtid_mode = on` since GTID based logging is a mandatory for providing correct `MaterializedMySQL` replication. :::note While turning on `gtid_mode` you should also specify `enforce_gtid_consistency = on`. @@ -57,8 +78,13 @@ While turning on `gtid_mode` you should also specify `enforce_gtid_consistency = When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) tables are used with virtual `_sign` and `_version` columns. -- `_version` — Transaction counter. Type [UInt64](../../sql-reference/data-types/int-uint.md). -- `_sign` — Deletion mark. Type [Int8](../../sql-reference/data-types/int-uint.md). Possible values: +### \_version + +`_version` — Transaction counter. Type [UInt64](../../sql-reference/data-types/int-uint.md). + +### \_sign + +`_sign` — Deletion mark. Type [Int8](../../sql-reference/data-types/int-uint.md). Possible values: - `1` — Row is not deleted, - `-1` — Row is deleted. diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md index 66f918f01d6..dc05c58f092 100644 --- a/docs/en/engines/database-engines/materialized-postgresql.md +++ b/docs/en/engines/database-engines/materialized-postgresql.md @@ -150,21 +150,21 @@ Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.htm ## Settings {#settings} -1. `materialized_postgresql_tables_list` {#materialized-postgresql-tables-list} +### `materialized_postgresql_tables_list` {#materialized-postgresql-tables-list} Sets a comma-separated list of PostgreSQL database tables, which will be replicated via [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) database engine. Default value: empty list — means whole PostgreSQL database will be replicated. -2. `materialized_postgresql_schema` {#materialized-postgresql-schema} +### `materialized_postgresql_schema` {#materialized-postgresql-schema} Default value: empty string. (Default schema is used) -3. `materialized_postgresql_schema_list` {#materialized-postgresql-schema-list} +### `materialized_postgresql_schema_list` {#materialized-postgresql-schema-list} Default value: empty list. (Default schema is used) -4. `materialized_postgresql_allow_automatic_update` {#materialized-postgresql-allow-automatic-update} +### `materialized_postgresql_allow_automatic_update` {#materialized-postgresql-allow-automatic-update} Do not use this setting before 22.1 version. @@ -177,7 +177,7 @@ Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.htm Default value: `0`. -5. `materialized_postgresql_max_block_size` {#materialized-postgresql-max-block-size} +### `materialized_postgresql_max_block_size` {#materialized-postgresql-max-block-size} Sets the number of rows collected in memory before flushing data into PostgreSQL database table. @@ -187,11 +187,11 @@ Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.htm Default value: `65536`. -6. `materialized_postgresql_replication_slot` {#materialized-postgresql-replication-slot} +### `materialized_postgresql_replication_slot` {#materialized-postgresql-replication-slot} A user-created replication slot. Must be used together with `materialized_postgresql_snapshot`. -7. `materialized_postgresql_snapshot` {#materialized-postgresql-snapshot} +### `materialized_postgresql_snapshot` {#materialized-postgresql-snapshot} A text string identifying a snapshot, from which [initial dump of PostgreSQL tables](../../engines/database-engines/materialized-postgresql.md) will be performed. Must be used together with `materialized_postgresql_replication_slot`. From c57e07cb49a8dd76843437f5664c491dc79f506e Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 24 Jun 2022 14:51:47 +0200 Subject: [PATCH 063/101] Style --- src/Storages/MergeTree/MergeTreeData.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 14644817238..dec99b2a5af 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2809,7 +2809,7 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( MutableDataPartPtr & part, SimpleIncrement * increment, Transaction & out_transaction, - std::unique_lock & lock, + DataPartsLock & lock, DataPartsVector * out_covered_parts, MergeTreeDeduplicationLog * deduplication_log, std::string_view deduplication_token) @@ -2820,9 +2820,6 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( part->assertState({DataPartState::Temporary}); - MergeTreePartInfo part_info = part->info; - String part_name; - if (DataPartPtr existing_part_in_partition = getAnyPartInPartition(part->info.partition_id, lock)) { if (part->partition.value != existing_part_in_partition->partition.value) @@ -2832,6 +2829,9 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( ErrorCodes::CORRUPTED_DATA); } + MergeTreePartInfo part_info = part->info; + String part_name; + /** It is important that obtaining new block number and adding that block to parts set is done atomically. * Otherwise there is race condition - merge of blocks could happen in interval that doesn't yet contain new part. */ @@ -2935,10 +2935,7 @@ void MergeTreeData::renameTempPartsAndReplace( renameTempPartAndReplaceImpl(part, increment, out_transaction, lock); } - - void MergeTreeData::removePartsFromWorkingSet(MergeTreeTransaction * txn, const MergeTreeData::DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock) - { if (txn) transactions_enabled.store(true); From bd2f5eb96d6027b43f2479aa2f177f89eee81de8 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 24 Jun 2022 08:57:51 -0400 Subject: [PATCH 064/101] move settings to H3 level --- docs/en/engines/database-engines/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/database-engines/index.md b/docs/en/engines/database-engines/index.md index 72689d29780..237112a5bee 100644 --- a/docs/en/engines/database-engines/index.md +++ b/docs/en/engines/database-engines/index.md @@ -20,7 +20,7 @@ Here is a complete list of available database engines. Follow the links for more - [PostgreSQL](../../engines/database-engines/postgresql.md) -- [MaterializedMySQL](../../engines/database-engines/materialized-postgresql.md) +- [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) - [Replicated](../../engines/database-engines/replicated.md) From 7517e1f4d5189f39fcde5f9ae276150dc1f94187 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 24 Jun 2022 15:24:02 +0200 Subject: [PATCH 065/101] Remove some complexity --- src/Storages/MergeTree/MergeTreeData.cpp | 114 +++++++++++------------ src/Storages/MergeTree/MergeTreeData.h | 11 +-- 2 files changed, 61 insertions(+), 64 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index dec99b2a5af..c19804e3481 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2794,7 +2794,33 @@ bool MergeTreeData::renameTempPartAndAdd( DataPartsVector covered_parts; { auto lock = lockParts(); - if (!renameTempPartAndReplaceImpl(part, increment, out_transaction, lock, &covered_parts, deduplication_log, deduplication_token)) + /** It is important that obtaining new block number and adding that block to parts set is done atomically. + * Otherwise there is race condition - merge of blocks could happen in interval that doesn't yet contain new part. + */ + if (increment) + { + part->info.min_block = part->info.max_block = increment->get(); + part->info.mutation = 0; + part->name = part->getNewName(part->info); + } + + /// Deduplication log used only from non-replicated MergeTree. Replicated + /// tables have their own mechanism. We try to deduplicate at such deep + /// level, because only here we know real part name which is required for + /// deduplication. + if (deduplication_log) + { + const String block_id = part->getZeroLevelPartBlockID(deduplication_token); + auto res = deduplication_log->addPart(block_id, part->info); + if (!res.second) + { + ProfileEvents::increment(ProfileEvents::DuplicatedInsertedBlocks); + LOG_INFO(log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartName()); + return false; + } + } + + if (!renameTempPartAndReplaceImpl(part, out_transaction, lock, &covered_parts)) return false; } if (!covered_parts.empty()) @@ -2807,13 +2833,12 @@ bool MergeTreeData::renameTempPartAndAdd( bool MergeTreeData::renameTempPartAndReplaceImpl( MutableDataPartPtr & part, - SimpleIncrement * increment, Transaction & out_transaction, DataPartsLock & lock, - DataPartsVector * out_covered_parts, - MergeTreeDeduplicationLog * deduplication_log, - std::string_view deduplication_token) + DataPartsVector * out_covered_parts) { + LOG_TRACE(log, "Renaming temporary part {} to {}.", part->data_part_storage->getPartDirectory(), part->name); + if (&out_transaction.data != this) throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", ErrorCodes::LOGICAL_ERROR); @@ -2829,24 +2854,7 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( ErrorCodes::CORRUPTED_DATA); } - MergeTreePartInfo part_info = part->info; - String part_name; - - /** It is important that obtaining new block number and adding that block to parts set is done atomically. - * Otherwise there is race condition - merge of blocks could happen in interval that doesn't yet contain new part. - */ - if (increment) - { - part_info.min_block = part_info.max_block = increment->get(); - part_info.mutation = 0; /// it's equal to min_block by default - part_name = part->getNewName(part_info); - } - else /// Parts from ReplicatedMergeTree already have names - part_name = part->name; - - LOG_TRACE(log, "Renaming temporary part {} to {}.", part->data_part_storage->getPartDirectory(), part_name); - - if (auto it_duplicate = data_parts_by_info.find(part_info); it_duplicate != data_parts_by_info.end()) + if (auto it_duplicate = data_parts_by_info.find(part->info); it_duplicate != data_parts_by_info.end()) { String message = "Part " + (*it_duplicate)->getNameWithState() + " already exists"; @@ -2857,52 +2865,25 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( } DataPartPtr covering_part; - DataPartsVector covered_parts = getActivePartsToReplace(part_info, part_name, covering_part, lock); + DataPartsVector covered_parts = getActivePartsToReplace(part->info, part->name, covering_part, lock); if (covering_part) { - LOG_WARNING(log, "Tried to add obsolete part {} covered by {}", part_name, covering_part->getNameWithState()); + LOG_WARNING(log, "Tried to add obsolete part {} covered by {}", part->name, covering_part->getNameWithState()); return false; } - /// Deduplication log used only from non-replicated MergeTree. Replicated - /// tables have their own mechanism. We try to deduplicate at such deep - /// level, because only here we know real part name which is required for - /// deduplication. - if (deduplication_log) - { - const String block_id = part->getZeroLevelPartBlockID(deduplication_token); - auto res = deduplication_log->addPart(block_id, part_info); - if (!res.second) - { - ProfileEvents::increment(ProfileEvents::DuplicatedInsertedBlocks); - LOG_INFO(log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartName()); - return false; - } - } - /// All checks are passed. Now we can rename the part on disk. /// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts /// /// If out_transaction is null, we commit the part to the active set immediately, else add it to the transaction. - - part->name = part_name; - part->info = part_info; part->is_temp = false; part->setState(DataPartState::PreActive); - part->renameTo(part_name, true); + part->renameTo(part->name, true); data_parts_indexes.insert(part); - out_transaction.precommitted_parts.insert(part); - auto part_in_memory = asInMemoryPart(part); - if (part_in_memory && getSettings()->in_memory_parts_enable_wal) - { - auto wal = getWriteAheadLog(); - wal->addPart(part_in_memory); - } - if (out_covered_parts) { for (DataPartPtr & covered_part : covered_parts) @@ -2914,13 +2895,12 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( MutableDataPartPtr & part, - Transaction & out_transaction, - SimpleIncrement * increment) + Transaction & out_transaction) { auto part_lock = lockParts(); DataPartsVector covered_parts; - renameTempPartAndReplaceImpl(part, increment, out_transaction, part_lock, &covered_parts); + renameTempPartAndReplaceImpl(part, out_transaction, part_lock, &covered_parts); return covered_parts; } @@ -2932,7 +2912,15 @@ void MergeTreeData::renameTempPartsAndReplace( SimpleIncrement * increment) { for (auto & part : parts) - renameTempPartAndReplaceImpl(part, increment, out_transaction, lock); + { + if (increment) + { + part->info.min_block = part->info.max_block = increment->get(); + part->info.mutation = 0; + part->name = part->getNewName(part->info); + } + renameTempPartAndReplaceImpl(part, out_transaction, lock, nullptr); + } } void MergeTreeData::removePartsFromWorkingSet(MergeTreeTransaction * txn, const MergeTreeData::DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock) @@ -4890,9 +4878,12 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData: if (!isEmpty()) { + auto settings = data.getSettings(); + MergeTreeData::WriteAheadLogPtr wal; auto parts_lock = acquired_parts_lock ? MergeTreeData::DataPartsLock() : data.lockParts(); auto * owing_parts_lock = acquired_parts_lock ? acquired_parts_lock : &parts_lock; + if (txn) { for (const DataPartPtr & part : precommitted_parts) @@ -4917,6 +4908,15 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData: for (const DataPartPtr & part : precommitted_parts) { + auto part_in_memory = asInMemoryPart(part); + if (part_in_memory && settings->in_memory_parts_enable_wal) + { + if (!wal) + wal = data.getWriteAheadLog(); + + wal->addPart(part_in_memory); + } + DataPartPtr covering_part; DataPartsVector covered_parts = data.getActivePartsToReplace(part->info, part->name, covering_part, *owing_parts_lock); if (covering_part) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 5ce59e560a2..7b144accb6f 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -566,8 +566,7 @@ public: /// If out_transaction == nullptr, marks covered parts as Outdated. DataPartsVector renameTempPartAndReplace( MutableDataPartPtr & part, - Transaction & out_transaction, - SimpleIncrement * increment = nullptr); + Transaction & out_transaction); void renameTempPartsAndReplace( MutableDataPartsVector & parts, @@ -1247,15 +1246,13 @@ protected: private: /// Low-level version of previous one, doesn't lock mutex - /// FIXME Transactions: remove add_to_txn flag, maybe merge MergeTreeTransaction and Transaction + /// FIXME Merge MergeTreeTransaction and Transaction bool renameTempPartAndReplaceImpl( MutableDataPartPtr & part, - SimpleIncrement * increment, Transaction & out_transaction, DataPartsLock & lock, - DataPartsVector * out_covered_parts = nullptr, - MergeTreeDeduplicationLog * deduplication_log = nullptr, - std::string_view deduplication_token = std::string_view()); + DataPartsVector * out_covered_parts); + /// RAII Wrapper for atomic work with currently moving parts /// Acquire them in constructor and remove them in destructor From 612c4571d5a47b04e1d246a3c3b368c7b4fe231e Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 24 Jun 2022 15:41:09 +0200 Subject: [PATCH 066/101] Split method into smaller --- src/Storages/MergeTree/MergeTreeData.cpp | 51 ++++++++++++++---------- src/Storages/MergeTree/MergeTreeData.h | 4 ++ 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c19804e3481..ad99b60d4ea 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2830,19 +2830,8 @@ bool MergeTreeData::renameTempPartAndAdd( return true; } - -bool MergeTreeData::renameTempPartAndReplaceImpl( - MutableDataPartPtr & part, - Transaction & out_transaction, - DataPartsLock & lock, - DataPartsVector * out_covered_parts) +void MergeTreeData::checkPartCanBeAddedToTable(MutableDataPartPtr & part, DataPartsLock & lock) const { - LOG_TRACE(log, "Renaming temporary part {} to {}.", part->data_part_storage->getPartDirectory(), part->name); - - if (&out_transaction.data != this) - throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", - ErrorCodes::LOGICAL_ERROR); - part->assertState({DataPartState::Temporary}); if (DataPartPtr existing_part_in_partition = getAnyPartInPartition(part->info.partition_id, lock)) @@ -2863,6 +2852,33 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( throw Exception(message, ErrorCodes::DUPLICATE_DATA_PART); } +} + +void MergeTreeData::preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, bool need_rename) +{ + part->is_temp = false; + part->setState(DataPartState::PreActive); + + if (need_rename) + part->renameTo(part->name, true); + + data_parts_indexes.insert(part); + out_transaction.precommitted_parts.insert(part); +} + +bool MergeTreeData::renameTempPartAndReplaceImpl( + MutableDataPartPtr & part, + Transaction & out_transaction, + DataPartsLock & lock, + DataPartsVector * out_covered_parts) +{ + LOG_TRACE(log, "Renaming temporary part {} to {}.", part->data_part_storage->getPartDirectory(), part->name); + + if (&out_transaction.data != this) + throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", + ErrorCodes::LOGICAL_ERROR); + + checkPartCanBeAddedToTable(part, lock); DataPartPtr covering_part; DataPartsVector covered_parts = getActivePartsToReplace(part->info, part->name, covering_part, lock); @@ -2875,17 +2891,12 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( /// All checks are passed. Now we can rename the part on disk. /// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts - /// - /// If out_transaction is null, we commit the part to the active set immediately, else add it to the transaction. - part->is_temp = false; - part->setState(DataPartState::PreActive); - part->renameTo(part->name, true); - - data_parts_indexes.insert(part); - out_transaction.precommitted_parts.insert(part); + preparePartForCommit(part, out_transaction, /* need_rename = */ true); if (out_covered_parts) { + out_covered_parts->reserve(covered_parts.size()); + for (DataPartPtr & covered_part : covered_parts) out_covered_parts->emplace_back(std::move(covered_part)); } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 7b144accb6f..851504c38f0 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1245,6 +1245,10 @@ protected: private: + void checkPartCanBeAddedToTable(MutableDataPartPtr & part, DataPartsLock & lock) const; + + void preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, bool need_rename); + /// Low-level version of previous one, doesn't lock mutex /// FIXME Merge MergeTreeTransaction and Transaction bool renameTempPartAndReplaceImpl( From 3bb765ae1fcfcb0687342ad0c93c4ab868e62cbb Mon Sep 17 00:00:00 2001 From: xinhuitian Date: Fri, 24 Jun 2022 22:24:48 +0800 Subject: [PATCH 067/101] fix some wrong titles and links in alter docs --- .../sql-reference/statements/alter/column.md | 12 +++++----- .../statements/alter/partition.md | 24 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 07266eb09a3..2a5e36eaa00 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -18,12 +18,12 @@ Each action is an operation on a column. The following actions are supported: -- [ADD COLUMN](#alter_add-column) — Adds a new column to the table. -- [DROP COLUMN](#alter_drop-column) — Deletes the column. -- [RENAME COLUMN](#alter_rename-column) — Renames an existing column. -- [CLEAR COLUMN](#alter_clear-column) — Resets column values. -- [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column. -- [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL. +- [ADD COLUMN](#add-column) — Adds a new column to the table. +- [DROP COLUMN](#drop-column) — Deletes the column. +- [RENAME COLUMN](#rename-column) — Renames an existing column. +- [CLEAR COLUMN](#clear-column) — Resets column values. +- [COMMENT COLUMN](#comment-column) — Adds a text comment to the column. +- [MODIFY COLUMN](#modify-column) — Changes column’s type, default expression and TTL. - [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties. - [MATERIALIZE COLUMN](#materialize-column) — Materializes the column in the parts where the column is missing. diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 75c80add9b7..27178c91de8 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -7,18 +7,18 @@ sidebar_label: PARTITION The following operations with [partitions](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md) are available: -- [DETACH PARTITION](#alter_detach-partition) — Moves a partition to the `detached` directory and forget it. -- [DROP PARTITION](#alter_drop-partition) — Deletes a partition. -- [ATTACH PART\|PARTITION](#alter_attach-partition) — Adds a part or partition from the `detached` directory to the table. -- [ATTACH PARTITION FROM](#alter_attach-partition-from) — Copies the data partition from one table to another and adds. -- [REPLACE PARTITION](#alter_replace-partition) — Copies the data partition from one table to another and replaces. -- [MOVE PARTITION TO TABLE](#alter_move_to_table-partition) — Moves the data partition from one table to another. -- [CLEAR COLUMN IN PARTITION](#alter_clear-column-partition) — Resets the value of a specified column in a partition. -- [CLEAR INDEX IN PARTITION](#alter_clear-index-partition) — Resets the specified secondary index in a partition. -- [FREEZE PARTITION](#alter_freeze-partition) — Creates a backup of a partition. -- [UNFREEZE PARTITION](#alter_unfreeze-partition) — Removes a backup of a partition. -- [FETCH PARTITION\|PART](#alter_fetch-partition) — Downloads a part or partition from another server. -- [MOVE PARTITION\|PART](#alter_move-partition) — Move partition/data part to another disk or volume. +- [DETACH PARTITION\|Part](#detach-partitionpart) — Moves a partition or part to the `detached` directory and forget it. +- [DROP PARTITION\|Part](#drop-partitionpart) — Deletes a partition or part. +- [ATTACH PARTITION\|Part](#attach-partitionpart) — Adds a partition or part from the `detached` directory to the table. +- [ATTACH PARTITION FROM](#attach-partition-from) — Copies the data partition from one table to another and adds. +- [REPLACE PARTITION](#replace-partition) — Copies the data partition from one table to another and replaces. +- [MOVE PARTITION TO TABLE](#move_to_table-partition) — Moves the data partition from one table to another. +- [CLEAR COLUMN IN PARTITION](#clear-column-partition) — Resets the value of a specified column in a partition. +- [CLEAR INDEX IN PARTITION](#clear-index-partition) — Resets the specified secondary index in a partition. +- [FREEZE PARTITION](#freeze-partition) — Creates a backup of a partition. +- [UNFREEZE PARTITION](#unfreeze-partition) — Removes a backup of a partition. +- [FETCH PARTITION\|PART](#fetch-partition) — Downloads a part or partition from another server. +- [MOVE PARTITION\|PART](#move-partition) — Move partition/data part to another disk or volume. - [UPDATE IN PARTITION](#update-in-partition) — Update data inside the partition by condition. - [DELETE IN PARTITION](#delete-in-partition) — Delete data inside the partition by condition. From 848ae7b1304f5b1389ed7c812f47edb130d1112b Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 24 Jun 2022 16:49:48 +0200 Subject: [PATCH 068/101] Update docker-compose to try get rid of v1 errors --- docker/test/integration/runner/Dockerfile | 2 +- docker/test/testflows/runner/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 57e4dfdeda1..80a2158b17d 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -67,7 +67,7 @@ RUN python3 -m pip install \ dict2xml \ dicttoxml \ docker \ - docker-compose==1.28.2 \ + docker-compose==1.29.2 \ grpcio \ grpcio-tools \ kafka-python \ diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index fbff6fd5e97..bfc3ed5e39f 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -38,7 +38,7 @@ RUN apt-get update \ ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 dicttoxml kazoo tzlocal==2.1 pytz python-dateutil numpy +RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.2 docker==5.0.0 dicttoxml kazoo tzlocal==2.1 pytz python-dateutil numpy ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 20.10.6 From 6ef534c86460b4e658be8fe0d3fadb499e3c386e Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 24 Jun 2022 11:13:15 -0400 Subject: [PATCH 069/101] move settings to H3 level --- .../mergetree-family/aggregatingmergetree.md | 6 +- .../mergetree-family/collapsingmergetree.md | 10 +- .../mergetree-family/graphitemergetree.md | 17 ++- .../mergetree-family/mergetree.md | 123 +++++++++++++----- .../mergetree-family/replacingmergetree.md | 8 +- .../mergetree-family/replication.md | 16 ++- .../mergetree-family/summingmergetree.md | 8 +- .../versionedcollapsingmergetree.md | 12 +- 8 files changed, 142 insertions(+), 58 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md index 5c3143c6c18..b2eea820139 100644 --- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -11,8 +11,8 @@ You can use `AggregatingMergeTree` tables for incremental data aggregation, incl The engine processes all columns with the following types: -- [AggregateFunction](../../../sql-reference/data-types/aggregatefunction.md) -- [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md) +## [AggregateFunction](../../../sql-reference/data-types/aggregatefunction.md) +## [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md) It is appropriate to use `AggregatingMergeTree` if it reduces the number of rows by orders. @@ -36,7 +36,7 @@ For a description of request parameters, see [request description](../../../sql- **Query clauses** -When creating a `AggregatingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table. +When creating an `AggregatingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.

diff --git a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md index afe323441ab..1b37e20d0da 100644 --- a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -7,7 +7,7 @@ sidebar_label: CollapsingMergeTree The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm. -`CollapsingMergeTree` asynchronously deletes (collapses) pairs of rows if all of the fields in a sorting key (`ORDER BY`) are equivalent excepting the particular field `Sign` which can have `1` and `-1` values. Rows without a pair are kept. For more details see the [Collapsing](#table_engine-collapsingmergetree-collapsing) section of the document. +`CollapsingMergeTree` asynchronously deletes (collapses) pairs of rows if all of the fields in a sorting key (`ORDER BY`) are equivalent except the particular field `Sign`, which can have `1` and `-1` values. Rows without a pair are kept. For more details see the [Collapsing](#table_engine-collapsingmergetree-collapsing) section of the document. The engine may significantly reduce the volume of storage and increase the efficiency of `SELECT` query as a consequence. @@ -28,13 +28,15 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] For a description of query parameters, see [query description](../../../sql-reference/statements/create/table.md). -**CollapsingMergeTree Parameters** +## CollapsingMergeTree Parameters -- `sign` — Name of the column with the type of row: `1` is a “state” row, `-1` is a “cancel” row. +### sign + +`sign` — Name of the column with the type of row: `1` is a “state” row, `-1` is a “cancel” row. Column data type — `Int8`. -**Query clauses** +## Query clauses When creating a `CollapsingMergeTree` table, the same [query clauses](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table. diff --git a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md index c1011e69ba6..9062dd3c423 100644 --- a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md @@ -87,10 +87,18 @@ Rollup configuration structure: ### Required Columns {#required-columns} -- `path_column_name` — The name of the column storing the metric name (Graphite sensor). Default value: `Path`. -- `time_column_name` — The name of the column storing the time of measuring the metric. Default value: `Time`. -- `value_column_name` — The name of the column storing the value of the metric at the time set in `time_column_name`. Default value: `Value`. -- `version_column_name` — The name of the column storing the version of the metric. Default value: `Timestamp`. +#### path_column_name + +`path_column_name` — The name of the column storing the metric name (Graphite sensor). Default value: `Path`. + +#### time_column_name +`time_column_name` — The name of the column storing the time of measuring the metric. Default value: `Time`. + +#### value_column_name +`value_column_name` — The name of the column storing the value of the metric at the time set in `time_column_name`. Default value: `Value`. + +#### version_column_name +`version_column_name` — The name of the column storing the version of the metric. Default value: `Timestamp`. ### Patterns {#patterns} @@ -254,7 +262,6 @@ Valid values: ``` - :::warning Data rollup is performed during merges. Usually, for old partitions, merges are not started, so for rollup it is necessary to trigger an unscheduled merge using [optimize](../../../sql-reference/statements/optimize.md). Or use additional tools, for example [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer). ::: diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 15f66d2695f..103272fb250 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -60,7 +60,11 @@ For a description of parameters, see the [CREATE query description](../../../sql ### Query Clauses {#mergetree-query-clauses} -- `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters. +#### ENGINE + +`ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters. + +#### - `ORDER BY` — The sorting key. @@ -70,18 +74,23 @@ For a description of parameters, see the [CREATE query description](../../../sql Use the `ORDER BY tuple()` syntax, if you do not need sorting. See [Selecting the Primary Key](#selecting-the-primary-key). +#### + - `PARTITION BY` — The [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Optional. In most cases you don't need partition key, and in most other cases you don't need partition key more granular than by months. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead make client identifier or name the first column in the ORDER BY expression). For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. +#### - `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional. By default the primary key is the same as the sorting key (which is specified by the `ORDER BY` clause). Thus in most cases it is unnecessary to specify a separate `PRIMARY KEY` clause. +#### - `SAMPLE BY` — An expression for sampling. Optional. If a sampling expression is used, the primary key must contain it. The result of a sampling expression must be an unsigned integer. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. +#### - `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional. Expression must have one `Date` or `DateTime` column as a result. Example: @@ -91,26 +100,76 @@ For a description of parameters, see the [CREATE query description](../../../sql For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl) -- `SETTINGS` — Additional parameters that control the behavior of the `MergeTree` (optional): +### SETTINGS +Additional parameters that control the behavior of the `MergeTree` (optional): - - `index_granularity` — Maximum number of data rows between the marks of an index. Default value: 8192. See [Data Storage](#mergetree-data-storage). - - `index_granularity_bytes` — Maximum size of data granules in bytes. Default value: 10Mb. To restrict the granule size only by number of rows, set to 0 (not recommended). See [Data Storage](#mergetree-data-storage). - - `min_index_granularity_bytes` — Min allowed size of data granules in bytes. Default value: 1024b. To provide a safeguard against accidentally creating tables with very low index_granularity_bytes. See [Data Storage](#mergetree-data-storage). - - `enable_mixed_granularity_parts` — Enables or disables transitioning to control the granule size with the `index_granularity_bytes` setting. Before version 19.11, there was only the `index_granularity` setting for restricting granule size. The `index_granularity_bytes` setting improves ClickHouse performance when selecting data from tables with big rows (tens and hundreds of megabytes). If you have tables with big rows, you can enable this setting for the tables to improve the efficiency of `SELECT` queries. - - `use_minimalistic_part_header_in_zookeeper` — Storage method of the data parts headers in ZooKeeper. If `use_minimalistic_part_header_in_zookeeper=1`, then ZooKeeper stores less data. For more information, see the [setting description](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) in “Server configuration parameters”. - - `min_merge_bytes_to_use_direct_io` — The minimum data volume for merge operation that is required for using direct I/O access to the storage disk. When merging data parts, ClickHouse calculates the total storage volume of all the data to be merged. If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, ClickHouse reads and writes the data to the storage disk using the direct I/O interface (`O_DIRECT` option). If `min_merge_bytes_to_use_direct_io = 0`, then direct I/O is disabled. Default value: `10 * 1024 * 1024 * 1024` bytes. +#### index_granularity + +`index_granularity` — Maximum number of data rows between the marks of an index. Default value: 8192. See [Data Storage](#mergetree-data-storage). + +#### index_granularity_bytes + +`index_granularity_bytes` — Maximum size of data granules in bytes. Default value: 10Mb. To restrict the granule size only by number of rows, set to 0 (not recommended). See [Data Storage](#mergetree-data-storage). + +#### min_index_granularity_bytes + +`min_index_granularity_bytes` — Min allowed size of data granules in bytes. Default value: 1024b. To provide a safeguard against accidentally creating tables with very low index_granularity_bytes. See [Data Storage](#mergetree-data-storage). + +#### enable_mixed_granularity_parts + +`enable_mixed_granularity_parts` — Enables or disables transitioning to control the granule size with the `index_granularity_bytes` setting. Before version 19.11, there was only the `index_granularity` setting for restricting granule size. The `index_granularity_bytes` setting improves ClickHouse performance when selecting data from tables with big rows (tens and hundreds of megabytes). If you have tables with big rows, you can enable this setting for the tables to improve the efficiency of `SELECT` queries. + +#### use_minimalistic_part_header_in_zookeeper + +`use_minimalistic_part_header_in_zookeeper` — Storage method of the data parts headers in ZooKeeper. If `use_minimalistic_part_header_in_zookeeper=1`, then ZooKeeper stores less data. For more information, see the [setting description](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) in “Server configuration parameters”. + +#### min_merge_bytes_to_use_direct_io + +`min_merge_bytes_to_use_direct_io` — The minimum data volume for merge operation that is required for using direct I/O access to the storage disk. When merging data parts, ClickHouse calculates the total storage volume of all the data to be merged. If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, ClickHouse reads and writes the data to the storage disk using the direct I/O interface (`O_DIRECT` option). If `min_merge_bytes_to_use_direct_io = 0`, then direct I/O is disabled. Default value: `10 * 1024 * 1024 * 1024` bytes. - - `merge_with_ttl_timeout` — Minimum delay in seconds before repeating a merge with delete TTL. Default value: `14400` seconds (4 hours). - - `merge_with_recompression_ttl_timeout` — Minimum delay in seconds before repeating a merge with recompression TTL. Default value: `14400` seconds (4 hours). - - `try_fetch_recompressed_part_timeout` — Timeout (in seconds) before starting merge with recompression. During this time ClickHouse tries to fetch recompressed part from replica which assigned this merge with recompression. Default value: `7200` seconds (2 hours). - - `write_final_mark` — Enables or disables writing the final index mark at the end of data part (after the last byte). Default value: 1. Don’t turn it off. - - `merge_max_block_size` — Maximum number of rows in block for merge operations. Default value: 8192. - - `storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes). - - `min_bytes_for_wide_part`, `min_rows_for_wide_part` — Minimum number of bytes/rows in a data part that can be stored in `Wide` format. You can set one, both or none of these settings. See [Data Storage](#mergetree-data-storage). - - `max_parts_in_total` — Maximum number of parts in all partitions. - - `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting. - - `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting. - - `max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. You can also specify setting [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) in the global setting. + +#### merge_with_ttl_timeout + +`merge_with_ttl_timeout` — Minimum delay in seconds before repeating a merge with delete TTL. Default value: `14400` seconds (4 hours). +#### merge_with_recompression_ttl_timeout + +`merge_with_recompression_ttl_timeout` — Minimum delay in seconds before repeating a merge with recompression TTL. Default value: `14400` seconds (4 hours). + +#### try_fetch_recompressed_part_timeout + +`try_fetch_recompressed_part_timeout` — Timeout (in seconds) before starting merge with recompression. During this time ClickHouse tries to fetch recompressed part from replica which assigned this merge with recompression. Default value: `7200` seconds (2 hours). + +#### write_final_mark + +`write_final_mark` — Enables or disables writing the final index mark at the end of data part (after the last byte). Default value: 1. Don’t turn it off. + +#### merge_max_block_size + +`merge_max_block_size` — Maximum number of rows in block for merge operations. Default value: 8192. + +#### storage_policy + +`storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes). + +#### min_bytes_for_wide_part + +`min_bytes_for_wide_part`, `min_rows_for_wide_part` — Minimum number of bytes/rows in a data part that can be stored in `Wide` format. You can set one, both or none of these settings. See [Data Storage](#mergetree-data-storage). + +#### max_parts_in_total + +`max_parts_in_total` — Maximum number of parts in all partitions. + +#### max_compress_block_size + +`max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting. + +#### min_compress_block_size + +`min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting. + +#### max_partitions_to_read + +`max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. You can also specify setting [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) in the global setting. **Example of Sections Setting** @@ -310,17 +369,17 @@ SELECT count() FROM table WHERE s < 'z' SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 ``` -#### Available Types of Indices {#available-types-of-indices} +### Available Types of Indices {#available-types-of-indices} -- `minmax` +#### `minmax` Stores extremes of the specified expression (if the expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of data like the primary key. -- `set(max_rows)` +#### `set(max_rows)` Stores unique values of the specified expression (no more than `max_rows` rows, `max_rows=0` means “no limits”). Uses the values to check if the `WHERE` expression is not satisfiable on a block of data. -- `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` +#### `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` Stores a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) that contains all ngrams from a block of data. Works only with datatypes: [String](../../../sql-reference/data-types/string.md), [FixedString](../../../sql-reference/data-types/fixedstring.md) and [Map](../../../sql-reference/data-types/map.md). Can be used for optimization of `EQUALS`, `LIKE` and `IN` expressions. @@ -329,11 +388,11 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 - `number_of_hash_functions` — The number of hash functions used in the Bloom filter. - `random_seed` — The seed for Bloom filter hash functions. -- `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` +#### `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` The same as `ngrambf_v1`, but stores tokens instead of ngrams. Tokens are sequences separated by non-alphanumeric characters. -- `bloom_filter([false_positive])` — Stores a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) for the specified columns. +#### `bloom_filter([false_positive])` — Stores a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) for the specified columns. The optional `false_positive` parameter is the probability of receiving a false positive response from the filter. Possible values: (0, 1). Default value: 0.025. @@ -357,7 +416,7 @@ INDEX sample_index2 (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100 INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4 ``` -#### Functions Support {#functions-support} +### Functions Support {#functions-support} Conditions in the `WHERE` clause contains calls of the functions that operate with columns. If the column is a part of an index, ClickHouse tries to use this index when performing the functions. ClickHouse supports different subsets of functions for using indexes. @@ -466,7 +525,7 @@ The `TTL` clause can’t be used for key columns. **Examples** -Creating a table with `TTL`: +#### Creating a table with `TTL`: ``` sql CREATE TABLE example_table @@ -481,7 +540,7 @@ PARTITION BY toYYYYMM(d) ORDER BY d; ``` -Adding TTL to a column of an existing table +#### Adding TTL to a column of an existing table ``` sql ALTER TABLE example_table @@ -489,7 +548,7 @@ ALTER TABLE example_table c String TTL d + INTERVAL 1 DAY; ``` -Altering TTL of the column +#### Altering TTL of the column ``` sql ALTER TABLE example_table @@ -524,7 +583,7 @@ If a column is not part of the `GROUP BY` expression and is not set explicitly i **Examples** -Creating a table with `TTL`: +#### Creating a table with `TTL`: ``` sql CREATE TABLE example_table @@ -540,7 +599,7 @@ TTL d + INTERVAL 1 MONTH [DELETE], d + INTERVAL 2 WEEK TO DISK 'bbb'; ``` -Altering `TTL` of the table: +#### Altering `TTL` of the table: ``` sql ALTER TABLE example_table @@ -561,7 +620,7 @@ ORDER BY d TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1; ``` -Creating a table, where expired rows are recompressed: +#### Creating a table, where expired rows are recompressed: ```sql CREATE TABLE table_for_recompression diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index 5586d108ead..daa507cab66 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -33,16 +33,18 @@ For a description of request parameters, see [statement description](../../../sq Uniqueness of rows is determined by the `ORDER BY` table section, not `PRIMARY KEY`. ::: -**ReplacingMergeTree Parameters** +## ReplacingMergeTree Parameters -- `ver` — column with the version number. Type `UInt*`, `Date`, `DateTime` or `DateTime64`. Optional parameter. +### ver + +`ver` — column with the version number. Type `UInt*`, `Date`, `DateTime` or `DateTime64`. Optional parameter. When merging, `ReplacingMergeTree` from all the rows with the same sorting key leaves only one: - The last in the selection, if `ver` not set. A selection is a set of rows in a set of parts participating in the merge. The most recently created part (the last insert) will be the last one in the selection. Thus, after deduplication, the very last row from the most recent insert will remain for each unique sorting key. - With the maximum version, if `ver` specified. If `ver` is the same for several rows, then it will use "if `ver` is not specified" rule for them, i.e. the most recent inserted row will remain. -**Query clauses** +## Query clauses When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table. diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index 3562bdf6d3a..0dfcdccb029 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -120,11 +120,19 @@ The system monitors data synchronicity on replicas and is able to recover after The `Replicated` prefix is added to the table engine name. For example:`ReplicatedMergeTree`. -**Replicated\*MergeTree parameters** +### Replicated\*MergeTree parameters -- `zoo_path` — The path to the table in ClickHouse Keeper. -- `replica_name` — The replica name in ClickHouse Keeper. -- `other_parameters` — Parameters of an engine which is used for creating the replicated version, for example, version in `ReplacingMergeTree`. +#### zoo_path + +`zoo_path` — The path to the table in ClickHouse Keeper. + +#### replica_name + +`replica_name` — The replica name in ClickHouse Keeper. + +#### other_parameters + +`other_parameters` — Parameters of an engine which is used for creating the replicated version, for example, version in `ReplacingMergeTree`. Example: diff --git a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md index b532aef1980..7afa7cf028e 100644 --- a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md @@ -26,14 +26,16 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] For a description of request parameters, see [request description](../../../sql-reference/statements/create/table.md). -**Parameters of SummingMergeTree** +### Parameters of SummingMergeTree -- `columns` - a tuple with the names of columns where values will be summarized. Optional parameter. +#### columns + +`columns` - a tuple with the names of columns where values will be summarized. Optional parameter. The columns must be of a numeric type and must not be in the primary key. If `columns` not specified, ClickHouse summarizes the values in all columns with a numeric data type that are not in the primary key. -**Query clauses** +### Query clauses When creating a `SummingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table. diff --git a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index c1fe5dfffdf..5642602f4a1 100644 --- a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -31,21 +31,25 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] For a description of query parameters, see the [query description](../../../sql-reference/statements/create/table.md). -**Engine Parameters** +### Engine Parameters ``` sql VersionedCollapsingMergeTree(sign, version) ``` -- `sign` — Name of the column with the type of row: `1` is a “state” row, `-1` is a “cancel” row. +#### sign + +`sign` — Name of the column with the type of row: `1` is a “state” row, `-1` is a “cancel” row. The column data type should be `Int8`. -- `version` — Name of the column with the version of the object state. +#### version + +`version` — Name of the column with the version of the object state. The column data type should be `UInt*`. -**Query Clauses** +### Query Clauses When creating a `VersionedCollapsingMergeTree` table, the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required as when creating a `MergeTree` table. From 011d58d7a0d13b6a261b29d4c91490034821586b Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 24 Jun 2022 17:19:59 +0200 Subject: [PATCH 070/101] Simplify more --- src/Storages/MergeTree/MergeTreeData.cpp | 71 ++++--------------- src/Storages/MergeTree/MergeTreeData.h | 20 +++--- src/Storages/MergeTree/MergeTreeSink.cpp | 43 +++++++++-- .../MergeTree/ReplicatedMergeTreeSink.cpp | 3 +- src/Storages/StorageMergeTree.cpp | 45 +++++++++--- src/Storages/StorageMergeTree.h | 2 + src/Storages/StorageReplicatedMergeTree.cpp | 13 ++-- 7 files changed, 108 insertions(+), 89 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index ad99b60d4ea..fdba84048f0 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -96,7 +96,6 @@ namespace ProfileEvents extern const Event RejectedInserts; extern const Event DelayedInserts; extern const Event DelayedInsertsMilliseconds; - extern const Event DuplicatedInsertedBlocks; extern const Event InsertedWideParts; extern const Event InsertedCompactParts; extern const Event InsertedInMemoryParts; @@ -2787,42 +2786,13 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace( bool MergeTreeData::renameTempPartAndAdd( MutableDataPartPtr & part, Transaction & out_transaction, - SimpleIncrement * increment, - MergeTreeDeduplicationLog * deduplication_log, - std::string_view deduplication_token) + DataPartsLock & lock) { DataPartsVector covered_parts; - { - auto lock = lockParts(); - /** It is important that obtaining new block number and adding that block to parts set is done atomically. - * Otherwise there is race condition - merge of blocks could happen in interval that doesn't yet contain new part. - */ - if (increment) - { - part->info.min_block = part->info.max_block = increment->get(); - part->info.mutation = 0; - part->name = part->getNewName(part->info); - } - /// Deduplication log used only from non-replicated MergeTree. Replicated - /// tables have their own mechanism. We try to deduplicate at such deep - /// level, because only here we know real part name which is required for - /// deduplication. - if (deduplication_log) - { - const String block_id = part->getZeroLevelPartBlockID(deduplication_token); - auto res = deduplication_log->addPart(block_id, part->info); - if (!res.second) - { - ProfileEvents::increment(ProfileEvents::DuplicatedInsertedBlocks); - LOG_INFO(log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartName()); - return false; - } - } + if (!renameTempPartAndReplaceImpl(part, out_transaction, lock, &covered_parts)) + return false; - if (!renameTempPartAndReplaceImpl(part, out_transaction, lock, &covered_parts)) - return false; - } if (!covered_parts.empty()) throw Exception("Added part " + part->name + " covers " + toString(covered_parts.size()) + " existing part(s) (including " + covered_parts[0]->name + ")", ErrorCodes::LOGICAL_ERROR); @@ -2904,34 +2874,23 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( return true; } +MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplaceUnlocked( + MutableDataPartPtr & part, + Transaction & out_transaction, + DataPartsLock & lock) +{ + DataPartsVector covered_parts; + renameTempPartAndReplaceImpl(part, out_transaction, lock, &covered_parts); + + return covered_parts; +} + MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( MutableDataPartPtr & part, Transaction & out_transaction) { auto part_lock = lockParts(); - - DataPartsVector covered_parts; - renameTempPartAndReplaceImpl(part, out_transaction, part_lock, &covered_parts); - - return covered_parts; -} - -void MergeTreeData::renameTempPartsAndReplace( - MutableDataPartsVector & parts, - Transaction & out_transaction, - DataPartsLock & lock, - SimpleIncrement * increment) -{ - for (auto & part : parts) - { - if (increment) - { - part->info.min_block = part->info.max_block = increment->get(); - part->info.mutation = 0; - part->name = part->getNewName(part->info); - } - renameTempPartAndReplaceImpl(part, out_transaction, lock, nullptr); - } + return renameTempPartAndReplaceUnlocked(part, out_transaction, part_lock); } void MergeTreeData::removePartsFromWorkingSet(MergeTreeTransaction * txn, const MergeTreeData::DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 851504c38f0..adc9c78d516 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -557,23 +557,20 @@ public: bool renameTempPartAndAdd( MutableDataPartPtr & part, Transaction & transaction, - SimpleIncrement * increment = nullptr, - MergeTreeDeduplicationLog * deduplication_log = nullptr, - std::string_view deduplication_token = std::string_view()); + DataPartsLock & lock); /// The same as renameTempPartAndAdd but the block range of the part can contain existing parts. /// Returns all parts covered by the added part (in ascending order). /// If out_transaction == nullptr, marks covered parts as Outdated. + DataPartsVector renameTempPartAndReplaceUnlocked( + MutableDataPartPtr & part, + Transaction & out_transaction, + DataPartsLock & lock); + DataPartsVector renameTempPartAndReplace( MutableDataPartPtr & part, Transaction & out_transaction); - void renameTempPartsAndReplace( - MutableDataPartsVector & parts, - Transaction & out_transaction, - DataPartsLock & lock, - SimpleIncrement * increment = nullptr); - /// Remove parts from working set immediately (without wait for background /// process). Transfer part state to temporary. Have very limited usage only /// for new parts which aren't already present in table. @@ -1245,11 +1242,14 @@ protected: private: + /// Checking that candidate part doesn't break invariants: correct partition and doesn't exist already void checkPartCanBeAddedToTable(MutableDataPartPtr & part, DataPartsLock & lock) const; + /// Preparing itself to be commited in memory: fill some fields inside part, add it to data_parts_indexes + /// in precommited state and to transasction void preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, bool need_rename); - /// Low-level version of previous one, doesn't lock mutex + /// Low-level method for preparing parts for commit. /// FIXME Merge MergeTreeTransaction and Transaction bool renameTempPartAndReplaceImpl( MutableDataPartPtr & part, diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 52f6fc46d9c..2f860b34fd5 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -3,6 +3,10 @@ #include #include +namespace ProfileEvents +{ + extern const Event DuplicatedInsertedBlocks; +} namespace DB { @@ -133,11 +137,42 @@ void MergeTreeSink::finishDelayedChunk() auto & part = partition.temp_part.part; - MergeTreeData::Transaction transaction(storage, context->getCurrentTransaction().get()); - /// Part can be deduplicated, so increment counters and add to part log only if it's really added - if (storage.renameTempPartAndAdd(part, transaction, &storage.increment, storage.getDeduplicationLog(), partition.block_dedup_token)) + bool added = false; + + { + auto lock = storage.lockParts(); + storage.fillNewPartName(part, lock); + + auto * deduplication_log = storage.getDeduplicationLog(); + if (deduplication_log) + { + const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); + auto res = deduplication_log->addPart(block_id, part->info); + if (!res.second) + { + ProfileEvents::increment(ProfileEvents::DuplicatedInsertedBlocks); + LOG_INFO(storage.log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartName()); + } + else + { + MergeTreeData::Transaction transaction(storage, context->getCurrentTransaction().get()); + added = storage.renameTempPartAndAdd(part, transaction, lock); + transaction.commit(&lock); + + } + } + else + { + MergeTreeData::Transaction transaction(storage, context->getCurrentTransaction().get()); + added = storage.renameTempPartAndAdd(part, transaction, lock); + transaction.commit(&lock); + } + + } + + /// Part can be deduplicated, so increment counters and add to part log only if it's really added + if (added) { - transaction.commit(); PartLog::addNewPart(storage.getContext(), part, partition.elapsed_ns); storage.incrementInsertedPartsProfileEvent(part->getType()); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index d63c79b2006..07e21def184 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -476,7 +476,8 @@ void ReplicatedMergeTreeSink::commitPart( try { - renamed = storage.renameTempPartAndAdd(part, transaction); + auto lock = storage.lockParts(); + renamed = storage.renameTempPartAndAdd(part, transaction, lock); } catch (const Exception & e) { diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9bb1db105c1..43ebd9d5e84 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1542,9 +1542,13 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( loaded_parts[i]->storeVersionMetadata(); String old_name = renamed_parts.old_and_new_names[i].old_name; - MergeTreeData::Transaction transaction(*this, local_context->getCurrentTransaction().get()); - renameTempPartAndAdd(loaded_parts[i], transaction, &increment); - transaction.commit(); + { + auto lock = lockParts(); + MergeTreeData::Transaction transaction(*this, local_context->getCurrentTransaction().get()); + fillNewPartName(loaded_parts[i], lock); + renameTempPartAndAdd(loaded_parts[i], transaction, lock); + transaction.commit(&lock); + } renamed_parts.old_and_new_names[i].old_name.clear(); @@ -1616,8 +1620,16 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con Transaction transaction(*this, local_context->getCurrentTransaction().get()); auto data_parts_lock = lockParts(); + + /** It is important that obtaining new block number and adding that block to parts set is done atomically. + * Otherwise there is race condition - merge of blocks could happen in interval that doesn't yet contain new part. + */ + for (auto part : dst_parts) + { + fillNewPartName(part, data_parts_lock); + renameTempPartAndReplaceUnlocked(part, transaction, data_parts_lock); + } /// Populate transaction - renameTempPartsAndReplace(dst_parts, transaction, data_parts_lock, &increment); transaction.commit(&data_parts_lock); /// If it is REPLACE (not ATTACH), remove all parts which max_block_number less then min_block_number of the first new block @@ -1690,13 +1702,15 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const auto src_data_parts_lock = lockParts(); auto dest_data_parts_lock = dest_table_storage->lockParts(); - std::mutex mutex; - DataPartsLock lock(mutex); + for (auto & part : dst_parts) + { + dest_table_storage->fillNewPartName(part, dest_data_parts_lock); + dest_table_storage->renameTempPartAndReplaceUnlocked(part, transaction, dest_data_parts_lock); + } - dest_table_storage->renameTempPartsAndReplace(dst_parts, transaction, lock, &dest_table_storage->increment); - removePartsFromWorkingSet(local_context->getCurrentTransaction().get(), src_parts, true, lock); - transaction.commit(&lock); + removePartsFromWorkingSet(local_context->getCurrentTransaction().get(), src_parts, true, src_data_parts_lock); + transaction.commit(&src_data_parts_lock); } clearOldPartsFromFilesystem(); @@ -1787,9 +1801,11 @@ void StorageMergeTree::attachRestoredParts(MutableDataPartsVector && parts) { for (auto part : parts) { + auto lock = lockParts(); MergeTreeData::Transaction transaction(*this, NO_TRANSACTION_RAW); - renameTempPartAndAdd(part, transaction, &increment); - transaction.commit(); + fillNewPartName(part, lock); + renameTempPartAndAdd(part, transaction, lock); + transaction.commit(&lock); } } @@ -1815,4 +1831,11 @@ std::unique_ptr StorageMergeTree::getDefaultSettings() const return std::make_unique(getContext()->getMergeTreeSettings()); } +void StorageMergeTree::fillNewPartName(MutableDataPartPtr & part, DataPartsLock &) +{ + part->info.min_block = part->info.max_block = increment.get(); + part->info.mutation = 0; + part->name = part->getNewName(part->info); +} + } diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index ccfe03d012a..582962551d0 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -251,6 +251,8 @@ private: /// return any ids. std::optional getIncompleteMutationsStatus(Int64 mutation_version, std::set * mutation_ids = nullptr) const; + void fillNewPartName(MutableDataPartPtr & part, DataPartsLock & lock); + void startBackgroundMovesIfNeeded() override; /// Attaches restored parts to the storage. diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 209beb0a5f6..af0b829bb89 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6602,7 +6602,8 @@ void StorageReplicatedMergeTree::replacePartitionFrom( Transaction transaction(*this, NO_TRANSACTION_RAW); { auto data_parts_lock = lockParts(); - renameTempPartsAndReplace(dst_parts, transaction, data_parts_lock); + for (auto & part : dst_parts) + renameTempPartAndReplaceUnlocked(part, transaction, data_parts_lock); } for (size_t i = 0; i < dst_parts.size(); ++i) @@ -6835,10 +6836,8 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta auto src_data_parts_lock = lockParts(); auto dest_data_parts_lock = dest_table_storage->lockParts(); - std::mutex mutex; - DataPartsLock lock(mutex); - - renameTempPartsAndReplace(dst_parts, transaction, lock); + for (auto & part : dst_parts) + renameTempPartAndReplaceUnlocked(part, transaction, src_data_parts_lock); for (size_t i = 0; i < dst_parts.size(); ++i) dest_table_storage->lockSharedData(*dst_parts[i], false, hardlinked_files_for_parts[i]); @@ -6849,8 +6848,8 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta else zkutil::KeeperMultiException::check(code, ops, op_results); - parts_to_remove = removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range, lock); - transaction.commit(&lock); + parts_to_remove = removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range, src_data_parts_lock); + transaction.commit(&src_data_parts_lock); } PartLog::addNewParts(getContext(), dst_parts, watch.elapsed()); From f685cf2268a0fe28754201b0eb9e16c1346daf68 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 24 Jun 2022 17:33:43 +0200 Subject: [PATCH 071/101] Fix comment --- src/Storages/MergeTree/MergeTreeData.h | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index adc9c78d516..72d029942ef 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -549,10 +549,7 @@ public: /// Renames temporary part to a permanent part and adds it to the parts set. /// It is assumed that the part does not intersect with existing parts. - /// If increment != nullptr, part index is determining using increment. Otherwise part index remains unchanged. - /// If out_transaction != nullptr, adds the part in the PreActive state (the part will be added to the - /// active set later with out_transaction->commit()). - /// Else, commits the part immediately. + /// Adds the part in the PreActive state (the part will be added to the active set later with out_transaction->commit()). /// Returns true if part was added. Returns false if part is covered by bigger part. bool renameTempPartAndAdd( MutableDataPartPtr & part, @@ -561,16 +558,16 @@ public: /// The same as renameTempPartAndAdd but the block range of the part can contain existing parts. /// Returns all parts covered by the added part (in ascending order). - /// If out_transaction == nullptr, marks covered parts as Outdated. + DataPartsVector renameTempPartAndReplace( + MutableDataPartPtr & part, + Transaction & out_transaction); + + /// Unlocked version of previous one. Useful when added multiple parts with a single lock. DataPartsVector renameTempPartAndReplaceUnlocked( MutableDataPartPtr & part, Transaction & out_transaction, DataPartsLock & lock); - DataPartsVector renameTempPartAndReplace( - MutableDataPartPtr & part, - Transaction & out_transaction); - /// Remove parts from working set immediately (without wait for background /// process). Transfer part state to temporary. Have very limited usage only /// for new parts which aren't already present in table. @@ -1249,7 +1246,7 @@ private: /// in precommited state and to transasction void preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, bool need_rename); - /// Low-level method for preparing parts for commit. + /// Low-level method for preparing parts for commit (in-memory). /// FIXME Merge MergeTreeTransaction and Transaction bool renameTempPartAndReplaceImpl( MutableDataPartPtr & part, @@ -1257,7 +1254,6 @@ private: DataPartsLock & lock, DataPartsVector * out_covered_parts); - /// RAII Wrapper for atomic work with currently moving parts /// Acquire them in constructor and remove them in destructor /// Uses data.currently_moving_parts_mutex From d963f262f828a8c6b2502c0cb2e86d35cbbf438b Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 24 Jun 2022 17:43:18 +0200 Subject: [PATCH 072/101] Fix style --- src/Storages/MergeTree/MergeTreeData.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 72d029942ef..4fd7dd7d3cf 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1242,8 +1242,8 @@ private: /// Checking that candidate part doesn't break invariants: correct partition and doesn't exist already void checkPartCanBeAddedToTable(MutableDataPartPtr & part, DataPartsLock & lock) const; - /// Preparing itself to be commited in memory: fill some fields inside part, add it to data_parts_indexes - /// in precommited state and to transasction + /// Preparing itself to be committed in memory: fill some fields inside part, add it to data_parts_indexes + /// in precommitted state and to transasction void preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, bool need_rename); /// Low-level method for preparing parts for commit (in-memory). From 75cd941956b322ebda0156035ddaa063e59e588c Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 24 Jun 2022 11:23:02 -0400 Subject: [PATCH 073/101] move settings to H3 level --- .../mergetree-family/mergetree.md | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 103272fb250..20d9a14b194 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -64,9 +64,9 @@ For a description of parameters, see the [CREATE query description](../../../sql `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters. -#### +#### ORDER_BY -- `ORDER BY` — The sorting key. +`ORDER BY` — The sorting key. A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`. @@ -74,24 +74,27 @@ For a description of parameters, see the [CREATE query description](../../../sql Use the `ORDER BY tuple()` syntax, if you do not need sorting. See [Selecting the Primary Key](#selecting-the-primary-key). -#### +#### PARTITION BY -- `PARTITION BY` — The [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Optional. In most cases you don't need partition key, and in most other cases you don't need partition key more granular than by months. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead make client identifier or name the first column in the ORDER BY expression). +`PARTITION BY` — The [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Optional. In most cases you don't need partition key, and in most other cases you don't need partition key more granular than by months. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead make client identifier or name the first column in the ORDER BY expression). For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. -#### -- `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional. +#### PRIMARY KEY + +`PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional. By default the primary key is the same as the sorting key (which is specified by the `ORDER BY` clause). Thus in most cases it is unnecessary to specify a separate `PRIMARY KEY` clause. -#### -- `SAMPLE BY` — An expression for sampling. Optional. +#### SAMPLE BY + +`SAMPLE BY` — An expression for sampling. Optional. If a sampling expression is used, the primary key must contain it. The result of a sampling expression must be an unsigned integer. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. -#### -- `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional. +#### TTL + +`TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional. Expression must have one `Date` or `DateTime` column as a result. Example: `TTL date + INTERVAL 1 DAY` From 70de1afad71ab62e1511c444bf01a62b0e63f162 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 24 Jun 2022 12:16:20 -0400 Subject: [PATCH 074/101] move settings to H3 level --- .../engines/table-engines/special/buffer.md | 41 +++++++++--- .../table-engines/special/distributed.md | 64 ++++++++++++++----- docs/en/engines/table-engines/special/join.md | 44 ++++++++++--- .../en/engines/table-engines/special/merge.md | 10 ++- 4 files changed, 119 insertions(+), 40 deletions(-) diff --git a/docs/en/engines/table-engines/special/buffer.md b/docs/en/engines/table-engines/special/buffer.md index 5f81bd76ae4..bcd7c390eb1 100644 --- a/docs/en/engines/table-engines/special/buffer.md +++ b/docs/en/engines/table-engines/special/buffer.md @@ -11,24 +11,45 @@ Buffers the data to write in RAM, periodically flushing it to another table. Dur Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) ``` -Engine parameters: +### Engine parameters: -- `database` – Database name. You can use `currentDatabase()` or another constant expression that returns a string. -- `table` – Table to flush data to. -- `num_layers` – Parallelism layer. Physically, the table will be represented as `num_layers` of independent buffers. Recommended value: 16. -- `min_time`, `max_time`, `min_rows`, `max_rows`, `min_bytes`, and `max_bytes` – Conditions for flushing data from the buffer. +#### database -Optional engine parameters: +`database` – Database name. You can use `currentDatabase()` or another constant expression that returns a string. -- `flush_time`, `flush_rows`, `flush_bytes` – Conditions for flushing data from the buffer, that will happen only in background (omitted or zero means no `flush*` parameters). +#### table + +`table` – Table to flush data to. + +#### num_layers + +`num_layers` – Parallelism layer. Physically, the table will be represented as `num_layers` of independent buffers. Recommended value: 16. + +#### min_time, max_time, min_rows, max_rows, min_bytes, and max_bytes + +Conditions for flushing data from the buffer. + +### Optional engine parameters: + +#### flush_time, flush_rows, and flush_bytes + +Conditions for flushing data from the buffer, that will happen only in background (omitted or zero means no `flush*` parameters). Data is flushed from the buffer and written to the destination table if all the `min*` conditions or at least one `max*` condition are met. Also, if at least one `flush*` condition are met flush initiated in background, this is different from `max*`, since `flush*` allows you to configure background flushes separately to avoid adding latency for `INSERT` (into `Buffer`) queries. -- `min_time`, `max_time`, `flush_time` – Condition for the time in seconds from the moment of the first write to the buffer. -- `min_rows`, `max_rows`, `flush_rows` – Condition for the number of rows in the buffer. -- `min_bytes`, `max_bytes`, `flush_bytes` – Condition for the number of bytes in the buffer. +#### min_time, max_time, and flush_time + +Condition for the time in seconds from the moment of the first write to the buffer. + +#### min_rows, max_rows, and flush_rows + +Condition for the number of rows in the buffer. + +#### min_bytes, max_bytes, and flush_bytes + +Condition for the number of bytes in the buffer. During the write operation, data is inserted to a `num_layers` number of random buffers. Or, if the data part to insert is large enough (greater than `max_rows` or `max_bytes`), it is written directly to the destination table, omitting the buffer. diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index fe3348c4d78..d643d4b3c68 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -27,42 +27,70 @@ When the `Distributed` table is pointing to a table on the current server you ca CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2 ENGINE = Distributed(cluster, database, table[, sharding_key[, policy_name]]) [SETTINGS name=value, ...] ``` -**Distributed Parameters** +### Distributed Parameters -- `cluster` - the cluster name in the server’s config file +#### cluster -- `database` - the name of a remote database +`cluster` - the cluster name in the server’s config file -- `table` - the name of a remote table +#### database -- `sharding_key` - (optionally) sharding key +`database` - the name of a remote database -- `policy_name` - (optionally) policy name, it will be used to store temporary files for async send +#### table + +`table` - the name of a remote table + +#### sharding_key + +`sharding_key` - (optionally) sharding key + +#### policy_name + +`policy_name` - (optionally) policy name, it will be used to store temporary files for async send **See Also** - [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting - [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples -**Distributed Settings** +### Distributed Settings -- `fsync_after_insert` - do the `fsync` for the file data after asynchronous insert to Distributed. Guarantees that the OS flushed the whole inserted data to a file **on the initiator node** disk. +#### fsync_after_insert -- `fsync_directories` - do the `fsync` for directories. Guarantees that the OS refreshed directory metadata after operations related to asynchronous inserts on Distributed table (after insert, after sending the data to shard, etc). +`fsync_after_insert` - do the `fsync` for the file data after asynchronous insert to Distributed. Guarantees that the OS flushed the whole inserted data to a file **on the initiator node** disk. -- `bytes_to_throw_insert` - if more than this number of compressed bytes will be pending for async INSERT, an exception will be thrown. 0 - do not throw. Default 0. +#### fsync_directories -- `bytes_to_delay_insert` - if more than this number of compressed bytes will be pending for async INSERT, the query will be delayed. 0 - do not delay. Default 0. +`fsync_directories` - do the `fsync` for directories. Guarantees that the OS refreshed directory metadata after operations related to asynchronous inserts on Distributed table (after insert, after sending the data to shard, etc). -- `max_delay_to_insert` - max delay of inserting data into Distributed table in seconds, if there are a lot of pending bytes for async send. Default 60. +#### bytes_to_throw_insert -- `monitor_batch_inserts` - same as [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) +`bytes_to_throw_insert` - if more than this number of compressed bytes will be pending for async INSERT, an exception will be thrown. 0 - do not throw. Default 0. -- `monitor_split_batch_on_failure` - same as [distributed_directory_monitor_split_batch_on_failure](../../../operations/settings/settings.md#distributed_directory_monitor_split_batch_on_failure) +#### bytes_to_delay_insert -- `monitor_sleep_time_ms` - same as [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) +`bytes_to_delay_insert` - if more than this number of compressed bytes will be pending for async INSERT, the query will be delayed. 0 - do not delay. Default 0. -- `monitor_max_sleep_time_ms` - same as [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) +#### max_delay_to_insert + +`max_delay_to_insert` - max delay of inserting data into Distributed table in seconds, if there are a lot of pending bytes for async send. Default 60. + +#### monitor_batch_inserts + +`monitor_batch_inserts` - same as [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) + +#### monitor_split_batch_on_failure + +`monitor_split_batch_on_failure` - same as [distributed_directory_monitor_split_batch_on_failure](../../../operations/settings/settings.md#distributed_directory_monitor_split_batch_on_failure) + +#### monitor_sleep_time_ms + +`monitor_sleep_time_ms` - same as [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) + +#### monitor_max_sleep_time_ms + +`monitor_max_sleep_time_ms` - same as [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) :::note **Durability settings** (`fsync_...`): @@ -213,7 +241,9 @@ To learn more about how distibuted `in` and `global in` queries are processed, r ## Virtual Columns {#virtual-columns} -- `_shard_num` — Contains the `shard_num` value from the table `system.clusters`. Type: [UInt32](../../../sql-reference/data-types/int-uint.md). +#### _shard_num + +`_shard_num` — Contains the `shard_num` value from the table `system.clusters`. Type: [UInt32](../../../sql-reference/data-types/int-uint.md). :::note Since [remote](../../../sql-reference/table-functions/remote.md) and [cluster](../../../sql-reference/table-functions/cluster.md) table functions internally create temporary Distributed table, `_shard_num` is available there too. diff --git a/docs/en/engines/table-engines/special/join.md b/docs/en/engines/table-engines/special/join.md index bb9744103f7..c95ebe19c31 100644 --- a/docs/en/engines/table-engines/special/join.md +++ b/docs/en/engines/table-engines/special/join.md @@ -23,11 +23,19 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] See the detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query. -**Engine Parameters** +## Engine Parameters -- `join_strictness` – [JOIN strictness](../../../sql-reference/statements/select/join.md#select-join-types). -- `join_type` – [JOIN type](../../../sql-reference/statements/select/join.md#select-join-types). -- `k1[, k2, ...]` – Key columns from the `USING` clause that the `JOIN` operation is made with. +### join_strictness + +`join_strictness` – [JOIN strictness](../../../sql-reference/statements/select/join.md#select-join-types). + +### join_type + +`join_type` – [JOIN type](../../../sql-reference/statements/select/join.md#select-join-types). + +### Key columns + +`k1[, k2, ...]` – Key columns from the `USING` clause that the `JOIN` operation is made with. Enter `join_strictness` and `join_type` parameters without quotes, for example, `Join(ANY, LEFT, col1)`. They must match the `JOIN` operation that the table will be used for. If the parameters do not match, ClickHouse does not throw an exception and may return incorrect data. @@ -56,12 +64,28 @@ Main use-cases for `Join`-engine tables are following: When creating a table, the following settings are applied: -- [join_use_nulls](../../../operations/settings/settings.md#join_use_nulls) -- [max_rows_in_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join) -- [max_bytes_in_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join) -- [join_overflow_mode](../../../operations/settings/query-complexity.md#settings-join_overflow_mode) -- [join_any_take_last_row](../../../operations/settings/settings.md#settings-join_any_take_last_row) -- [persistent](../../../operations/settings/settings.md#persistent) +#### join_use_nulls + +[join_use_nulls](../../../operations/settings/settings.md#join_use_nulls) + +#### max_rows_in_join + +[max_rows_in_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join) + +#### max_bytes_in_join + +[max_bytes_in_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join) + +#### join_overflow_mode + +[join_overflow_mode](../../../operations/settings/query-complexity.md#settings-join_overflow_mode) + +#### join_any_take_last_row + +[join_any_take_last_row](../../../operations/settings/settings.md#settings-join_any_take_last_row) +#### join_use_nulls + +[persistent](../../../operations/settings/settings.md#persistent) The `Join`-engine tables can’t be used in `GLOBAL JOIN` operations. diff --git a/docs/en/engines/table-engines/special/merge.md b/docs/en/engines/table-engines/special/merge.md index 0f97acda8b5..ab15ad8dc76 100644 --- a/docs/en/engines/table-engines/special/merge.md +++ b/docs/en/engines/table-engines/special/merge.md @@ -15,14 +15,18 @@ Reading is automatically parallelized. Writing to a table is not supported. When CREATE TABLE ... Engine=Merge(db_name, tables_regexp) ``` -**Engine Parameters** +## Engine Parameters -- `db_name` — Possible values: +### db_name + +`db_name` — Possible values: - database name, - constant expression that returns a string with a database name, for example, `currentDatabase()`, - `REGEXP(expression)`, where `expression` is a regular expression to match the DB names. -- `tables_regexp` — A regular expression to match the table names in the specified DB or DBs. +### tables_regexp + +`tables_regexp` — A regular expression to match the table names in the specified DB or DBs. Regular expressions — [re2](https://github.com/google/re2) (supports a subset of PCRE), case-sensitive. See the notes about escaping symbols in regular expressions in the "match" section. From 2bc4dc4a1d04db6055b85422e2c70bfb142f8f3a Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 24 Jun 2022 18:38:43 +0200 Subject: [PATCH 075/101] Fix accident change --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index af0b829bb89..6895142b2a4 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6837,7 +6837,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta auto dest_data_parts_lock = dest_table_storage->lockParts(); for (auto & part : dst_parts) - renameTempPartAndReplaceUnlocked(part, transaction, src_data_parts_lock); + dest_table_storage->renameTempPartAndReplaceUnlocked(part, transaction, dest_data_parts_lock); for (size_t i = 0; i < dst_parts.size(); ++i) dest_table_storage->lockSharedData(*dst_parts[i], false, hardlinked_files_for_parts[i]); From d52dc3201070a9dd2a57858d6c3772e6652cc8fe Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 24 Jun 2022 19:31:41 +0200 Subject: [PATCH 076/101] Update fetchPostgreSQLTableStructure.cpp --- src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp index 08a7e78d0e9..10cde43e9e1 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp @@ -300,10 +300,11 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure( "and a.attnum = ANY(ix.indkey) " "and t.relkind in ('r', 'p') " /// simple tables "and t.relname = {} " /// Connection is already done to a needed database, only table name is needed. - "and t.relnamespace = {} " + "{}" "and ix.indisreplident = 't' " /// index is is replica identity index - "ORDER BY a.attname", /// column names - quoteString(postgres_table), quoteString(postgres_schema.empty() ? "public" : postgres_schema)); + "ORDER BY a.attname", /// column name + (postgres_schema.empty() ? "" : "and t.relnamespace = " + quoteString(postgres_schema)) + " ", + quoteString(postgres_table)); table.replica_identity_columns = readNamesAndTypesList(tx, postgres_table_with_schema, query, use_nulls, true); } From a88ae9ca99dae0490c5076c07b71e1b447ad4734 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 24 Jun 2022 19:48:38 +0200 Subject: [PATCH 077/101] Review fix --- src/Databases/DatabaseFactory.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 82a7dff7125..5c7c1dedf9b 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "config_core.h" @@ -334,18 +335,25 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String configuration.username = safeGetLiteralValue(engine_args[2], engine_name); configuration.password = safeGetLiteralValue(engine_args[3], engine_name); + bool is_deprecated_syntax = false; if (engine_args.size() >= 5) { auto arg_value = engine_args[4]->as()->value; if (arg_value.getType() == Field::Types::Which::String) + { configuration.schema = safeGetLiteralValue(engine_args[4], engine_name); + } else + { use_table_cache = safeGetLiteralValue(engine_args[4], engine_name); + LOG_WARNING(&Poco::Logger::get("DatabaseFactory"), "A deprecated syntax of PostgreSQL database engine is used"); + is_deprecated_syntax = true; + } } - } - if (engine_args.size() >= 6) - use_table_cache = safeGetLiteralValue(engine_args[5], engine_name); + if (!is_deprecated_syntax && engine_args.size() >= 6) + use_table_cache = safeGetLiteralValue(engine_args[5], engine_name); + } auto pool = std::make_shared(configuration, context->getSettingsRef().postgresql_connection_pool_size, From 8d3772948a5ec5977a9417e8019d94792c504fa2 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 24 Jun 2022 15:31:42 -0400 Subject: [PATCH 078/101] move title to frontmatter --- docs/en/operations/access-rights.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/en/operations/access-rights.md b/docs/en/operations/access-rights.md index 34d79aa99d9..a431f10fbad 100644 --- a/docs/en/operations/access-rights.md +++ b/docs/en/operations/access-rights.md @@ -1,10 +1,9 @@ --- sidebar_position: 48 sidebar_label: Access Control and Account Management +title: Access Control and Account Management --- -# Access Control and Account Management - ClickHouse supports access control management based on [RBAC](https://en.wikipedia.org/wiki/Role-based_access_control) approach. ClickHouse access entities: From 13a6254e1e7f4f302bd767472c105b7b57210c93 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jun 2022 01:45:38 +0200 Subject: [PATCH 079/101] Correct submodule --- .gitmodules | 6 +- contrib/base-x | 1 + contrib/base-x/.gitignore | 4 - contrib/base-x/.travis.yml | 36 - contrib/base-x/LICENSE | 21 - contrib/base-x/README.md | 97 - contrib/base-x/base_x.hh | 614 ------ contrib/base-x/tests/test.cc | 30 - contrib/base-x/tests/testcases/tests.cc | 359 ---- contrib/base-x/uinteger_t.hh | 2546 ----------------------- 10 files changed, 4 insertions(+), 3710 deletions(-) create mode 160000 contrib/base-x delete mode 100644 contrib/base-x/.gitignore delete mode 100755 contrib/base-x/.travis.yml delete mode 100644 contrib/base-x/LICENSE delete mode 100644 contrib/base-x/README.md delete mode 100644 contrib/base-x/base_x.hh delete mode 100644 contrib/base-x/tests/test.cc delete mode 100644 contrib/base-x/tests/testcases/tests.cc delete mode 100644 contrib/base-x/uinteger_t.hh diff --git a/.gitmodules b/.gitmodules index a8924e3aaba..f65806c1da5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -268,9 +268,9 @@ [submodule "contrib/hashidsxx"] path = contrib/hashidsxx url = https://github.com/schoentoon/hashidsxx.git -[submodule "contrib/base-x"] - path = contrib/base-x - url = https://github.com/ClickHouse/base-x.git [submodule "contrib/liburing"] path = contrib/liburing url = https://github.com/axboe/liburing.git +[submodule "contrib/base-x"] + path = contrib/base-x + url = https://github.com/ClickHouse/base-x.git diff --git a/contrib/base-x b/contrib/base-x new file mode 160000 index 00000000000..a85f98fb4ed --- /dev/null +++ b/contrib/base-x @@ -0,0 +1 @@ +Subproject commit a85f98fb4ed52c2f4029a4b6ac1ef0bafdfc56f5 diff --git a/contrib/base-x/.gitignore b/contrib/base-x/.gitignore deleted file mode 100644 index b63b40c8b71..00000000000 --- a/contrib/base-x/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -.DS_Store -test -*.o -*.dSYM \ No newline at end of file diff --git a/contrib/base-x/.travis.yml b/contrib/base-x/.travis.yml deleted file mode 100755 index f55132e614f..00000000000 --- a/contrib/base-x/.travis.yml +++ /dev/null @@ -1,36 +0,0 @@ -sudo: false - -language: cpp - -compiler: - - clang - - gcc - -addons: - apt: - sources: - - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.8 - packages: - - g++-6 - - clang-3.8 - -install: - - if [ "$CXX" = "g++" ]; then export CXX="g++-6"; fi - - if [ "$CXX" == "clang++" ]; then export CXX="clang++-3.8"; fi - - sudo apt-get install -qq git cmake - -before_script: - # not much better than git submodules, but there was never a need/want for the repo in this repo - - cd .. - - git clone https://github.com/google/googletest.git - - cd googletest - - git reset --hard d62d6c6556d96dda924382547c54a4b3afedb22c - - cmake CMakeLists.txt - - make - - - cd ../base-x/tests - - make - -script: - - make run diff --git a/contrib/base-x/LICENSE b/contrib/base-x/LICENSE deleted file mode 100644 index f7b3408abac..00000000000 --- a/contrib/base-x/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2017 German Mendez Bravo (Kronuz) @ german dot mb at gmail.com - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/contrib/base-x/README.md b/contrib/base-x/README.md deleted file mode 100644 index 5dc4a068043..00000000000 --- a/contrib/base-x/README.md +++ /dev/null @@ -1,97 +0,0 @@ -# base-x [![License][license-img]][license-url] [![GitHub Stars][stars-img]][stars-url] [![GitHub Forks][forks-img]][forks-url] [![GitHub Watchers][watchers-img]][watchers-url] [![Tweet][tweet-img]][tweet-url] - -[![Build Status](https://travis-ci.org/Kronuz/base-x.svg?branch=master)](https://travis-ci.org/Kronuz/base-x) - - -### BaseX encoder / decoder for C++ - -This is a fast base encoder / decoder of any given alphabet. - - -#### Example - -``` cpp -// example.cc -// g++ -std=c++14 -o example example.cc - -#include -#include "base_x.hh" - -int main() { - auto encoded = Base58::base58().encode("Hello world!"); - - std::cout << encoded << std::endl; - // => 1LDlk6QWOejX6rPrJ - - return 0; -} -``` - - -#### Compilation - -* g++ and clang++ are supported. -* C++14 is required. - - -### Alphabets - -See below for a list of commonly recognized alphabets, and their respective base. - -Base | Factory | Alphabet ------|---------------------|------------- - 2 | base2::base2() | `01` - 2 | base8::base8() | `01234567` - 11 | bas11::bas11() | `0123456789a` - 16 | base16::base16() | `0123456789abcdef` - 32 | base32::base32() | `0123456789ABCDEFGHJKMNPQRSTVWXYZ` - 36 | base36::base36() | `0123456789abcdefghijklmnopqrstuvwxyz` - 58 | base58::base58() | `123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz` - 58 | base58::bitcoin() | `123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz` - 58 | base58::gmp() | `0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuv` - 58 | base58::ripple() | `rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz` - 58 | base58::flickr() | `123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ` - 62 | base62::base62() | `0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz` - 62 | base62::inverted() | `0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ` - 64 | base64::base64() | `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/` - 64 | base64::urlsafe() | `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_` - 66 | base66::base66() | `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.!~` - - -### How it works - -It encodes octet arrays by doing long divisions on all significant digits in the -array, creating a representation of that number in the new base. - -**If you need standard hex encoding, or base64 encoding, this module is NOT -appropriate.** - - -## Author -[**German Mendez Bravo (Kronuz)**](https://kronuz.io/) - -[![Follow on GitHub][github-follow-img]][github-follow-url] -[![Follow on Twitter][twitter-follow-img]][twitter-follow-url] - - -## License - -MIT License. See [LICENSE](LICENSE) for details. - -Copyright (c) 2017 German Mendez Bravo (Kronuz) @ german dot mb at gmail.com - - -[license-url]: https://github.com/Kronuz/base-x/blob/master/LICENSE -[license-img]: https://img.shields.io/github/license/Kronuz/base-x.svg -[stars-url]: https://github.com/Kronuz/base-x/stargazers -[stars-img]: https://img.shields.io/github/stars/Kronuz/base-x.svg?style=social&label=Stars -[forks-url]: https://github.com/Kronuz/base-x/network/members -[forks-img]: https://img.shields.io/github/forks/Kronuz/base-x.svg?style=social&label=Forks -[watchers-url]: https://github.com/Kronuz/base-x/watchers -[watchers-img]: https://img.shields.io/github/watchers/Kronuz/base-x.svg?style=social&label=Watchers -[tweet-img]: https://img.shields.io/twitter/url/https/github.com/Kronuz/base-x.svg?style=social -[tweet-url]: https://twitter.com/intent/tweet?text=Base-X+encoding%2Fdecoding+for+modern+C%2B%2B+by+%40germbravo:&url=https%3A%2F%2Fgithub.com%2FKronuz%2Fbase-x -[github-follow-url]: https://github.com/Kronuz -[github-follow-img]: https://img.shields.io/github/followers/Kronuz.svg?style=social&label=Follow -[twitter-follow-url]: https://twitter.com/intent/follow?screen_name=germbravo -[twitter-follow-img]: https://img.shields.io/twitter/follow/germbravo.svg?style=social&label=Follow diff --git a/contrib/base-x/base_x.hh b/contrib/base-x/base_x.hh deleted file mode 100644 index fdc06fead2f..00000000000 --- a/contrib/base-x/base_x.hh +++ /dev/null @@ -1,614 +0,0 @@ -/* -base_x.hh -BaseX encoder / decoder for C++ - -Copyright (c) 2017 German Mendez Bravo (Kronuz) @ german dot mb at gmail.com - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef __BASE_X__H_ -#define __BASE_X__H_ - -#include // for std::find_if, std::reverse -#include // for std::invalid_argument -#include // for std::string -#include // for std::enable_if_t - -#include "uinteger_t.hh" - - -class BaseX { - char _chr[256]; - int _ord[256]; - - const int size; - const int alphabet_base; - const unsigned base_size; - const unsigned alphabet_base_bits; - const unsigned block_size; - const uinteger_t::digit alphabet_base_mask; - const unsigned padding_size; - const char padding; - const int flags; - - constexpr char chr(unsigned char ord) const { - return _chr[ord]; - } - - constexpr int ord(unsigned char chr) const { - return _ord[chr]; - } - -public: - static constexpr int ignore_case = (1 << 0); - static constexpr int with_checksum = (1 << 1); - static constexpr int with_check = (1 << 2); - static constexpr int block_padding = (1 << 3); - - template - constexpr BaseX(int flgs, const char (&alphabet)[alphabet_size1], const char (&extended)[extended_size1], const char (&padding_string)[padding_size1], const char (&translate)[translate_size1]) : - _chr(), - _ord(), - size(alphabet_size1 - 1 + extended_size1 - 1), - alphabet_base(alphabet_size1 - 1), - base_size(uinteger_t::base_size(alphabet_base)), - alphabet_base_bits(uinteger_t::base_bits(alphabet_base)), - block_size((flgs & BaseX::block_padding) ? alphabet_base_bits : 0), - alphabet_base_mask(alphabet_base - 1), - padding_size(padding_size1 - 1), - padding(padding_size ? padding_string[0] : '\0'), - flags(flgs) - { - for (int c = 0; c < 256; ++c) { - _chr[c] = 0; - _ord[c] = alphabet_base; - } - for (int cp = 0; cp < alphabet_base; ++cp) { - auto ch = alphabet[cp]; - _chr[cp] = ch; - ASSERT(_ord[(unsigned char)ch] == alphabet_base); // Duplicate character in the alphabet - _ord[(unsigned char)ch] = cp; - if (flags & BaseX::ignore_case) { - if (ch >= 'A' && ch <='Z') { - _ord[(unsigned char)ch - 'A' + 'a'] = cp; - } else if (ch >= 'a' && ch <='z') { - _ord[(unsigned char)ch - 'a' + 'A'] = cp; - } - } - } - for (std::size_t i = 0; i < extended_size1 - 1; ++i) { - auto ch = extended[i]; - auto cp = alphabet_base + i; - _chr[cp] = ch; - ASSERT(_ord[(unsigned char)ch] == alphabet_base); // Duplicate character in the extended alphabet - _ord[(unsigned char)ch] = cp; - if (flags & BaseX::ignore_case) { - if (ch >= 'A' && ch <='Z') { - _ord[(unsigned char)ch - 'A' + 'a'] = cp; - } else if (ch >= 'a' && ch <='z') { - _ord[(unsigned char)ch - 'a' + 'A'] = cp; - } - } - } - int cp = -1; - for (std::size_t i = 0; i < translate_size1 - 1; ++i) { - auto ch = translate[i]; - auto ncp = _ord[(unsigned char)ch]; - if (ncp >= alphabet_base) { - ASSERT(_ord[(unsigned char)ch] == alphabet_base); // Invalid translation character - _ord[(unsigned char)ch] = cp; - if (flags & BaseX::ignore_case) { - if (ch >= 'A' && ch <='Z') { - _ord[(unsigned char)ch - 'A' + 'a'] = cp; - } else if (ch >= 'a' && ch <='z') { - _ord[(unsigned char)ch - 'a' + 'A'] = cp; - } - } - } else { - cp = ncp; - } - } - } - - // Get string representation of value - template ::value>> - void encode(Result& result, const uinteger_t& input) const { - std::size_t bp = 0; - uinteger_t quotient; - if (block_size) { - bp = ((input.bits() + 7) & 0xf8) % block_size; - bp = bp ? (block_size - bp) % block_size : 0; - if (bp) { - quotient = input << bp; - } - } - const uinteger_t& num = bp ? quotient : input; - auto num_sz = num.size(); - if (num_sz) { - int sum = 0; - result.reserve(num_sz * base_size); - if (alphabet_base_bits) { - std::size_t shift = 0; - auto ptr = reinterpret_cast(num.data()); - uinteger_t::digit v = *ptr++; - v <<= uinteger_t::half_digit_bits; - for (auto i = num_sz * 2 - 1; i; --i) { - v >>= uinteger_t::half_digit_bits; - v |= (static_cast(*ptr++) << uinteger_t::half_digit_bits); - do { - auto d = static_cast((v >> shift) & alphabet_base_mask); - result.push_back(chr(d)); - shift += alphabet_base_bits; - sum += d; - } while (shift <= uinteger_t::half_digit_bits); - shift -= uinteger_t::half_digit_bits; - } - v >>= (shift + uinteger_t::half_digit_bits); - while (v) { - auto d = static_cast(v & alphabet_base_mask); - result.push_back(chr(d)); - v >>= alphabet_base_bits; - sum += d; - } - auto s = chr(0); - auto rit_f = std::find_if(result.rbegin(), result.rend(), [s](const char& c) { return c != s; }); - result.resize(result.rend() - rit_f); // shrink - } else { - uinteger_t uint_base = alphabet_base; - if (!bp) { - quotient = num; - } - do { - auto r = quotient.divmod(uint_base); - auto d = static_cast(r.second); - result.push_back(chr(d)); - quotient = std::move(r.first); - sum += d; - } while (quotient); - } - std::reverse(result.begin(), result.end()); - if (padding_size) { - Result p; - p.resize((padding_size - (result.size() % padding_size)) % padding_size, padding); - result.append(p); - } - if (flags & BaseX::with_check) { - auto chk = static_cast(num % size); - result.push_back(chr(chk)); - sum += chk; - } - if (flags & BaseX::with_checksum) { - auto sz = result.size(); - sz = (sz + sz / size) % size; - sum += sz; - sum = (size - sum % size) % size; - result.push_back(chr(sum)); - } - } else { - result.push_back(chr(0)); - } - } - - template ::value>> - Result encode(const uinteger_t& num) const { - Result result; - encode(result, num); - return result; - } - - template ::value>> - void encode(Result& result, const unsigned char* decoded, std::size_t decoded_size) const { - encode(result, uinteger_t(decoded, decoded_size, 256)); - } - - template ::value>> - Result encode(const unsigned char* decoded, std::size_t decoded_size) const { - Result result; - encode(result, uinteger_t(decoded, decoded_size, 256)); - return result; - } - - template ::value>> - void encode(Result& result, const char* decoded, std::size_t decoded_size) const { - encode(result, uinteger_t(decoded, decoded_size, 256)); - } - - template ::value>> - Result encode(const char* decoded, std::size_t decoded_size) const { - Result result; - encode(result, uinteger_t(decoded, decoded_size, 256)); - return result; - } - - template ::value>> - void encode(Result& result, T (&s)[N]) const { - encode(result, s, N - 1); - } - - template ::value>> - Result encode(T (&s)[N]) const { - Result result; - encode(result, s, N - 1); - return result; - } - - template ::value>> - void encode(Result& result, const std::string& binary) const { - return encode(result, binary.data(), binary.size()); - } - - template ::value>> - Result encode(const std::string& binary) const { - Result result; - encode(result, binary.data(), binary.size()); - return result; - } - - void decode(uinteger_t& result, const char* encoded, std::size_t encoded_size) const { - result = 0; - int sum = 0; - int sumsz = 0; - int direction = 1; - - auto sz = encoded_size; - if (flags & BaseX::with_checksum) --sz; - if (flags & BaseX::with_check) --sz; - - int bp = 0; - - if (alphabet_base_bits) { - for (; sz; --sz, encoded += direction) { - auto c = *encoded; - if (c == padding) break; - auto d = ord(static_cast(c)); - if (d < 0) continue; // ignored character - if (d >= alphabet_base) { - throw std::invalid_argument("Error: Invalid character: '" + std::string(1, c) + "' at " + std::to_string(encoded_size - sz)); - } - sum += d; - ++sumsz; - result = (result << alphabet_base_bits) | d; - bp += block_size; - } - } else { - uinteger_t uint_base = alphabet_base; - for (; sz; --sz, encoded += direction) { - auto c = *encoded; - if (c == padding) break; - auto d = ord(static_cast(c)); - if (d < 0) continue; // ignored character - if (d >= alphabet_base) { - throw std::invalid_argument("Error: Invalid character: '" + std::string(1, c) + "' at " + std::to_string(encoded_size - sz)); - } - sum += d; - ++sumsz; - result = (result * uint_base) + d; - bp += block_size; - } - } - - for (; sz && *encoded == padding; --sz, ++encoded); - - result >>= (bp & 7); - - if (flags & BaseX::with_check) { - auto c = *encoded; - auto d = ord(static_cast(c)); - if (d < 0 || d >= size) { - throw std::invalid_argument("Error: Invalid character: '" + std::string(1, c) + "' at " + std::to_string(encoded_size - sz)); - } - auto chk = static_cast(result % size); - if (d != chk) { - throw std::invalid_argument("Error: Invalid check"); - } - sum += chk; - ++sumsz; - ++encoded; - } - - if (flags & BaseX::with_checksum) { - auto c = *encoded; - auto d = ord(static_cast(c)); - if (d < 0 || d >= size) { - throw std::invalid_argument("Error: Invalid character: '" + std::string(1, c) + "' at " + std::to_string(encoded_size - sz)); - } - sum += d; - sum += (sumsz + sumsz / size) % size; - if (sum % size) { - throw std::invalid_argument("Error: Invalid checksum"); - } - } - } - - template ::value>> - void decode(Result& result, const char* encoded, std::size_t encoded_size) const { - uinteger_t num; - decode(num, encoded, encoded_size); - result = num.template str(256); - } - - template ::value or std::is_integral::value>> - Result decode(const char* encoded, std::size_t encoded_size) const { - Result result; - decode(result, encoded, encoded_size); - return result; - } - - template ::value or std::is_integral::value>> - void decode(Result& result, T (&s)[N]) const { - decode(result, s, N - 1); - } - - template ::value or std::is_integral::value>> - Result decode(T (&s)[N]) const { - Result result; - decode(result, s, N - 1); - return result; - } - - template ::value or std::is_integral::value>> - void decode(Result& result, const std::string& encoded) const { - decode(result, encoded.data(), encoded.size()); - } - - template ::value or std::is_integral::value>> - Result decode(const std::string& encoded) const { - Result result; - decode(result, encoded.data(), encoded.size()); - return result; - } - - bool is_valid(const char* encoded, std::size_t encoded_size) const { - int sum = 0; - int sumsz = 0; - if (flags & BaseX::with_checksum) --sumsz; - for (; encoded_size; --encoded_size, ++encoded) { - auto d = ord(static_cast(*encoded)); - if (d < 0) continue; // ignored character - if (d >= alphabet_base) { - return false; - } - sum += d; - ++sumsz; - } - if (flags & BaseX::with_checksum) { - sum += (sumsz + sumsz / size) % size; - if (sum % size) { - return false; - } - } - return true; - } - - template - bool is_valid(T (&s)[N]) const { - return is_valid(s, N - 1); - } - - bool is_valid(const std::string& encoded) const { - return is_valid(encoded.data(), encoded.size()); - } -}; - -// base2 -struct Base2 { - static const BaseX& base2() { - static constexpr BaseX encoder(0, "01", "", "", ""); - return encoder; - } - static const BaseX& base2chk() { - static constexpr BaseX encoder(BaseX::with_checksum, "01", "", "", ""); - return encoder; - } -}; - -// base8 -struct Base8 { - static const BaseX& base8() { - static constexpr BaseX encoder(0, "01234567", "", "", ""); - return encoder; - } - static const BaseX& base8chk() { - static constexpr BaseX encoder(BaseX::with_checksum, "01234567", "", "", ""); - return encoder; - } -}; - -// base11 -struct Base11 { - static const BaseX& base11() { - static constexpr BaseX encoder(BaseX::ignore_case, "0123456789a", "", "", ""); - return encoder; - } - static const BaseX& base11chk() { - static constexpr BaseX encoder(BaseX::ignore_case | BaseX::with_checksum, "0123456789a", "", "", ""); - return encoder; - } -}; - -// base16 -struct Base16 { - static const BaseX& base16() { - static constexpr BaseX encoder(BaseX::ignore_case, "0123456789abcdef", "", "", ""); - return encoder; - } - static const BaseX& base16chk() { - static constexpr BaseX encoder(BaseX::ignore_case | BaseX::with_checksum, "0123456789abcdef", "", "", ""); - return encoder; - } - static const BaseX& rfc4648() { - static constexpr BaseX encoder(0, "0123456789ABCDEF", "", "", ""); - return encoder; - } -}; - -// base32 -struct Base32 { - static const BaseX& base32() { - static constexpr BaseX encoder(BaseX::ignore_case, "0123456789abcdefghijklmnopqrstuv", "", "", ""); - return encoder; - } - static const BaseX& base32chk() { - static constexpr BaseX encoder(BaseX::ignore_case | BaseX::with_checksum, "0123456789abcdefghijklmnopqrstuv", "", "", ""); - return encoder; - } - static const BaseX& crockford() { - static constexpr BaseX encoder(BaseX::ignore_case, "0123456789ABCDEFGHJKMNPQRSTVWXYZ", "", "", "-0O1IL"); - return encoder; - } - static const BaseX& crockfordchk() { - static constexpr BaseX encoder(BaseX::ignore_case | BaseX::with_check, "0123456789ABCDEFGHJKMNPQRSTVWXYZ", "*~$=U", "", "-0O1IL"); - return encoder; - } - static const BaseX& rfc4648() { - static constexpr BaseX encoder(BaseX::block_padding, "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", "", "========", "\n\r"); - return encoder; - } - static const BaseX& rfc4648hex() { - static constexpr BaseX encoder(BaseX::block_padding, "0123456789ABCDEFGHIJKLMNOPQRSTUV", "", "========", "\n\r"); - return encoder; - } -}; - -// base36 -struct Base36 { - static const BaseX& base36() { - static constexpr BaseX encoder(BaseX::ignore_case, "0123456789abcdefghijklmnopqrstuvwxyz", "", "", ""); - return encoder; - } - static const BaseX& base36chk() { - static constexpr BaseX encoder(BaseX::ignore_case | BaseX::with_checksum, "0123456789abcdefghijklmnopqrstuvwxyz", "", "", ""); - return encoder; - } -}; - -// base58 -struct Base58 { - static const BaseX& base58() { - static constexpr BaseX encoder(0, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuv", "", "", ""); - return encoder; - } - static const BaseX& base58chk() { - static constexpr BaseX encoder(BaseX::with_checksum, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuv", "", "", ""); - return encoder; - } - static const BaseX& bitcoin() { - static constexpr BaseX encoder(0, "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz", "", "", ""); - return encoder; - } - static const BaseX& bitcoinchk() { - static constexpr BaseX encoder(BaseX::with_checksum, "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz", "", "", ""); - return encoder; - } - static const BaseX& ripple() { - static constexpr BaseX encoder(0, "rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz", "", "", ""); - return encoder; - } - static const BaseX& ripplechk() { - static constexpr BaseX encoder(BaseX::with_checksum, "rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz", "", "", ""); - return encoder; - } - static const BaseX& flickr() { - static constexpr BaseX encoder(0, "123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ", "", "", ""); - return encoder; - } - static const BaseX& flickrchk() { - static constexpr BaseX encoder(BaseX::with_checksum, "123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ", "", "", ""); - return encoder; - } -}; - -// base59 -struct Base59 { - static const BaseX& base59() { - static constexpr BaseX encoder(0, "23456789abcdefghijklmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ", "", "", "l1IO0"); - return encoder; - } - static const BaseX& base59chk() { - static constexpr BaseX encoder(BaseX::with_checksum, "23456789abcdefghijklmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ", "", "", "l1IO0"); - return encoder; - } - static const BaseX& dubaluchk() { - static constexpr BaseX encoder(BaseX::with_checksum, "zy9MalDxwpKLdvW2AtmscgbYUq6jhP7E53TiXenZRkVCrouBH4GSQf8FNJO", "", "", "-l1IO0"); - return encoder; - } -}; - -// base62 -struct Base62 { - static const BaseX& base62() { - static constexpr BaseX encoder(0, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "", "", ""); - return encoder; - } - static const BaseX& base62chk() { - static constexpr BaseX encoder(BaseX::with_checksum, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "", "", ""); - return encoder; - } - static const BaseX& inverted() { - static constexpr BaseX encoder(0, "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", "", "", ""); - return encoder; - } - static const BaseX& invertedchk() { - static constexpr BaseX encoder(BaseX::with_checksum, "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", "", "", ""); - return encoder; - } -}; - -// base64 -struct Base64 { - static const BaseX& base64() { - static constexpr BaseX encoder(0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", "", "", ""); - return encoder; - } - static const BaseX& base64chk() { - static constexpr BaseX encoder(BaseX::with_checksum, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", "", "", ""); - return encoder; - } - static const BaseX& url() { - static constexpr BaseX encoder(0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", "", "", ""); - return encoder; - } - static const BaseX& urlchk() { - static constexpr BaseX encoder(BaseX::with_checksum, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", "", "", ""); - return encoder; - } - static const BaseX& rfc4648() { - static constexpr BaseX encoder(BaseX::block_padding, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", "", "====", "\n\r"); - return encoder; - } - static const BaseX& rfc4648url() { - static constexpr BaseX encoder(BaseX::block_padding, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", "", "====", "\n\r"); - return encoder; - } -}; - -// base66 -struct Base66 { - static const BaseX& base66() { - static constexpr BaseX encoder(0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.!~", "", "", ""); - return encoder; - } - static const BaseX& base66chk() { - static constexpr BaseX encoder(BaseX::with_checksum, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.!~", "", "", ""); - return encoder; - } -}; - -#endif diff --git a/contrib/base-x/tests/test.cc b/contrib/base-x/tests/test.cc deleted file mode 100644 index d47d211173e..00000000000 --- a/contrib/base-x/tests/test.cc +++ /dev/null @@ -1,30 +0,0 @@ -/* -The MIT License (MIT) - -Copyright (c) 2017 German Mendez Bravo (Kronuz) @ german dot mb at gmail.com - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include - -int main(int argc, char * argv[]){ - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} \ No newline at end of file diff --git a/contrib/base-x/tests/testcases/tests.cc b/contrib/base-x/tests/testcases/tests.cc deleted file mode 100644 index c5bebfc8288..00000000000 --- a/contrib/base-x/tests/testcases/tests.cc +++ /dev/null @@ -1,359 +0,0 @@ -/* -The MIT License (MIT) - -Copyright (c) 2017 German Mendez Bravo (Kronuz) @ german dot mb at gmail.com - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include - -#include "base_x.hh" - - -static constexpr BaseX test_base2(0, "01", "", "", ""); -static constexpr BaseX test_base16(0, "0123456789abcdef", "", "", ""); -static constexpr BaseX test_base58(0, "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz", "", "", ""); - - -TEST(UUID, Encode) { - EXPECT_EQ(Base62::base62().encode("\330\105\140\310\23\117\21\346\241\342\64\66\73\322\155\256"), "6a630O1jrtMjCrQDyG3D3O"); - EXPECT_EQ(Base58::bitcoin().encode("\330\105\140\310\23\117\21\346\241\342\64\66\73\322\155\256"), "ThxCy1Ek2q6UhWQhj9CK1o"); - EXPECT_EQ(Base58::base58().encode("\330\105\140\310\23\117\21\346\241\342\64\66\73\322\155\256"), "QetBu0Dh1m5ReTNeg8BI0k"); -} - -TEST(BaseX, checksums) { - EXPECT_EQ(Base64::base64().encode("Hello world!"), "SGVsbG8gd29ybGQh"); - EXPECT_EQ(Base64::base64chk().encode("Hello world!"), "SGVsbG8gd29ybGQhG"); - - EXPECT_EQ(Base64::base64().decode("SGVsbG8gd29ybGQh"), "Hello world!"); - EXPECT_EQ(Base64::base64chk().decode("SGVsbG8gd29ybGQhG"), "Hello world!"); - - EXPECT_EQ(Base62::base62().encode("Hello world!"), "T8dgcjRGuYUueWht"); - EXPECT_EQ(Base62::base62chk().encode("Hello world!"), "T8dgcjRGuYUueWhtE"); - - EXPECT_EQ(Base62::base62().decode("T8dgcjRGuYUueWht"), "Hello world!"); - EXPECT_EQ(Base62::base62chk().decode("T8dgcjRGuYUueWhtE"), "Hello world!"); - - EXPECT_EQ(Base62::base62chk().is_valid("T8dgcjRGuYUueWhtE"), true); - EXPECT_EQ(Base62::base62chk().is_valid("Some random text!"), false); -} - -TEST(base16, Encoder) { - EXPECT_EQ(Base16::base16().encode("A"), "41"); - EXPECT_EQ(Base16::base16().encode("AB"), "4142"); - EXPECT_EQ(Base16::base16().encode("ABC"), "414243"); - EXPECT_EQ(Base16::base16().encode("ABCD"), "41424344"); - EXPECT_EQ(Base16::base16().encode("ABCDE"), "4142434445"); - EXPECT_EQ(Base16::base16().encode("ABCDEF"), "414243444546"); - - EXPECT_EQ(Base16::rfc4648().encode("A"), "41"); - EXPECT_EQ(Base16::rfc4648().encode("AB"), "4142"); - EXPECT_EQ(Base16::rfc4648().encode("ABC"), "414243"); - EXPECT_EQ(Base16::rfc4648().encode("ABCD"), "41424344"); - EXPECT_EQ(Base16::rfc4648().encode("ABCDE"), "4142434445"); - EXPECT_EQ(Base16::rfc4648().encode("ABCDEF"), "414243444546"); -} - -TEST(base16, Decoder) { - EXPECT_EQ(Base16::base16().decode("41"), "A"); - EXPECT_EQ(Base16::base16().decode("4142"), "AB"); - EXPECT_EQ(Base16::base16().decode("414243"), "ABC"); - EXPECT_EQ(Base16::base16().decode("41424344"), "ABCD"); - EXPECT_EQ(Base16::base16().decode("4142434445"), "ABCDE"); - EXPECT_EQ(Base16::base16().decode("414243444546"), "ABCDEF"); - - EXPECT_EQ(Base16::rfc4648().decode("41"), "A"); - EXPECT_EQ(Base16::rfc4648().decode("4142"), "AB"); - EXPECT_EQ(Base16::rfc4648().decode("414243"), "ABC"); - EXPECT_EQ(Base16::rfc4648().decode("41424344"), "ABCD"); - EXPECT_EQ(Base16::rfc4648().decode("4142434445"), "ABCDE"); - EXPECT_EQ(Base16::rfc4648().decode("414243444546"), "ABCDEF"); -} - -TEST(base32, Encoder) { - // Note base64() encoding is NOT the same as the standard (rfc4648) - EXPECT_EQ(Base32::base32().encode("A"), "21"); - EXPECT_EQ(Base32::base32().encode("AB"), "ga2"); - EXPECT_EQ(Base32::base32().encode("ABC"), "42gi3"); - EXPECT_EQ(Base32::base32().encode("ABCD"), "10k4gq4"); - EXPECT_EQ(Base32::base32().encode("ABCDE"), "85146h25"); - EXPECT_EQ(Base32::base32().encode("ABCDEF"), "21891k8ha6"); - EXPECT_EQ(Base32::base32().encode("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "21891k8ha68t44iiib9h6ksjqga5956l2lapblgmaq"); - - EXPECT_EQ(Base32::rfc4648().encode("A"), "IE======"); - EXPECT_EQ(Base32::rfc4648().encode("AB"), "IFBA===="); - EXPECT_EQ(Base32::rfc4648().encode("ABC"), "IFBEG==="); - EXPECT_EQ(Base32::rfc4648().encode("ABCD"), "IFBEGRA="); - EXPECT_EQ(Base32::rfc4648().encode("ABCDE"), "IFBEGRCF"); - EXPECT_EQ(Base32::rfc4648().encode("ABCDEF"), "IFBEGRCFIY======"); - EXPECT_EQ(Base32::rfc4648().encode("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "IFBEGRCFIZDUQSKKJNGE2TSPKBIVEU2UKVLFOWCZLI======"); - - EXPECT_EQ(Base32::crockford().encode(519571), "FVCK"); - EXPECT_EQ(Base32::crockfordchk().encode(1234), "16JD"); - EXPECT_EQ(Base32::crockfordchk().encode("Hello World"), "28CNP6RVS0AXQQ4V348"); -} - -TEST(base32, Decoder) { - // Note base64() encoding is NOT the same as the standard (rfc4648) - EXPECT_EQ(Base32::base32().decode("21"), "A"); - EXPECT_EQ(Base32::base32().decode("ga2"), "AB"); - EXPECT_EQ(Base32::base32().decode("42gi3"), "ABC"); - EXPECT_EQ(Base32::base32().decode("10k4gq4"), "ABCD"); - EXPECT_EQ(Base32::base32().decode("85146h25"), "ABCDE"); - EXPECT_EQ(Base32::base32().decode("21891k8ha6"), "ABCDEF"); - EXPECT_EQ(Base32::base32().decode("21891k8ha68t44iiib9h6ksjqga5956l2lapblgmaq"), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); - - EXPECT_EQ(Base32::rfc4648().decode("IE======"), "A"); - EXPECT_EQ(Base32::rfc4648().decode("IFBA===="), "AB"); - EXPECT_EQ(Base32::rfc4648().decode("IFBEG==="), "ABC"); - EXPECT_EQ(Base32::rfc4648().decode("IFBEGRA="), "ABCD"); - EXPECT_EQ(Base32::rfc4648().decode("IFBEGRCF"), "ABCDE"); - EXPECT_EQ(Base32::rfc4648().decode("IFBEGRCFIY======"), "ABCDEF"); - EXPECT_EQ(Base32::rfc4648().decode("IFBEGRCFIZDUQSKKJNGE2TSPKBIVEU2UKVLFOWCZLI======"), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); - - EXPECT_EQ(Base32::crockford().decode("FVCK"), 519571); - EXPECT_EQ(Base32::crockfordchk().is_valid("16JD"), true); - EXPECT_EQ(Base32::crockfordchk().decode("16JD"), 1234); - - EXPECT_EQ(Base32::crockfordchk().decode("2-8cn-p6r-vso-axq-q4v-348"), "Hello World"); -} - -TEST(base58, Encoder) { - EXPECT_EQ(Base58::base58().decode("1TFvCj"), 987654321); - EXPECT_EQ(Base58::base58().encode(987654321), "1TFvCj"); - EXPECT_EQ(Base58::base58().encode("Hello world!"), "1LDlk6QWOejX6rPrJ"); - EXPECT_EQ(Base58::bitcoin().encode("Hello world!"), "2NEpo7TZRhna7vSvL"); -} - -TEST(base62, Encoder) { - EXPECT_EQ(Base62::base62().decode("14q60P"), 987654321); - EXPECT_EQ(Base62::base62().encode(987654321), "14q60P"); - EXPECT_EQ(Base62::base62().encode("Hello world!"), "T8dgcjRGuYUueWht"); - EXPECT_EQ(Base62::inverted().encode("Hello world!"), "t8DGCJrgUyuUEwHT"); -} - -TEST(base64, Encoder) { - // Note Base64 encoding is NOT the same as the standard (rfc4648) - EXPECT_EQ(Base64::base64().encode("A"), "BB"); - EXPECT_EQ(Base64::base64().encode("AB"), "EFC"); - EXPECT_EQ(Base64::base64().encode("ABC"), "QUJD"); - EXPECT_EQ(Base64::base64().encode("ABCD"), "BBQkNE"); - EXPECT_EQ(Base64::base64().encode("ABCDE"), "EFCQ0RF"); - EXPECT_EQ(Base64::base64().encode("ABCDEF"), "QUJDREVG"); - EXPECT_EQ(Base64::base64().encode("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "EFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFla"); - - EXPECT_EQ(Base64::rfc4648().encode("A"), "QQ=="); - EXPECT_EQ(Base64::rfc4648().encode("AB"), "QUI="); - EXPECT_EQ(Base64::rfc4648().encode("ABC"), "QUJD"); - EXPECT_EQ(Base64::rfc4648().encode("ABCD"), "QUJDRA=="); - EXPECT_EQ(Base64::rfc4648().encode("ABCDE"), "QUJDREU="); - EXPECT_EQ(Base64::rfc4648().encode("ABCDEF"), "QUJDREVG"); - EXPECT_EQ(Base64::rfc4648().encode("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVo="); -} - -TEST(base64, Decoder) { - // Note Base64 encoding is NOT the same as the standard (rfc4648) - EXPECT_EQ(Base64::base64().decode("BB"), "A"); - EXPECT_EQ(Base64::base64().decode("EFC"), "AB"); - EXPECT_EQ(Base64::base64().decode("QUJD"), "ABC"); - EXPECT_EQ(Base64::base64().decode("BBQkNE"), "ABCD"); - EXPECT_EQ(Base64::base64().decode("EFCQ0RF"), "ABCDE"); - EXPECT_EQ(Base64::base64().decode("QUJDREVG"), "ABCDEF"); - EXPECT_EQ(Base64::base64().decode("EFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFla"), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); - - EXPECT_EQ(Base64::rfc4648().decode("QQ=="), "A"); - EXPECT_EQ(Base64::rfc4648().decode("QUI="), "AB"); - EXPECT_EQ(Base64::rfc4648().decode("QUJD"), "ABC"); - EXPECT_EQ(Base64::rfc4648().decode("QUJDRA=="), "ABCD"); - EXPECT_EQ(Base64::rfc4648().decode("QUJDREU="), "ABCDE"); - EXPECT_EQ(Base64::rfc4648().decode("QUJDREVG"), "ABCDEF"); - EXPECT_EQ(Base64::rfc4648().decode("QUJDREVG\nR0hJSktM\nTU5PUFFS\nU1RVVldY\nWVo="), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); -} - -TEST(base58, ShouldEncodeAndDecodeIntegers) { - auto data = 987654321; - - auto gmpEncoded = Base58::base58().encode(data); - auto bitcoinEncoded = Base58::bitcoin().encode(data); - auto rippleEncoded = Base58::ripple().encode(data); - auto flickrEncoded = Base58::flickr().encode(data); - - EXPECT_EQ(gmpEncoded, "1TFvCj"); - EXPECT_EQ(bitcoinEncoded, "2WGzDn"); - EXPECT_EQ(rippleEncoded, "pWGzD8"); - EXPECT_EQ(flickrEncoded, "2vgZdM"); - - auto gmpDecoded = Base58::base58().decode(gmpEncoded); - auto bitcoinDecoded = Base58::bitcoin().decode(bitcoinEncoded); - auto rippleDecoded = Base58::ripple().decode(rippleEncoded); - auto flickrDecoded = Base58::flickr().decode(flickrEncoded); - - EXPECT_EQ(gmpDecoded, data); - EXPECT_EQ(bitcoinDecoded, data); - EXPECT_EQ(rippleDecoded, data); - EXPECT_EQ(flickrDecoded, data); - - auto encoded = Base58::base58().encode(data); - auto decoded = Base58::base58().decode(encoded); - - EXPECT_EQ(decoded, data); -} - -TEST(base58, LongText) { - auto data = "Lorem ipsum dolor consectetur."; - - auto gmpEncoded = Base58::base58().encode(data); - auto bitcoinEncoded = Base58::bitcoin().encode(data); - auto rippleEncoded = Base58::ripple().encode(data); - auto flickrEncoded = Base58::flickr().encode(data); - - EXPECT_EQ(gmpEncoded, "FIHZQEpJ739QdqChX1PkgTBqP1FaDgJWQiGvY92YA"); - EXPECT_EQ(bitcoinEncoded, "GKJcTFtL84ATguDka2SojWCuS2GdEjLZTmHzbA3bB"); - EXPECT_EQ(rippleEncoded, "GKJcTEtL3hwTguDk2pSojWUuSpGdNjLZTmHzbwsbB"); - EXPECT_EQ(flickrEncoded, "gjiBsfTk84asFUdKz2rNJvcUr2gCeJkysLhZAa3Ab"); - - auto gmpDecoded = Base58::base58().decode(gmpEncoded); - auto bitcoinDecoded = Base58::bitcoin().decode(bitcoinEncoded); - auto rippleDecoded = Base58::ripple().decode(rippleEncoded); - auto flickrDecoded = Base58::flickr().decode(flickrEncoded); - - EXPECT_EQ(gmpDecoded, data); - EXPECT_EQ(bitcoinDecoded, data); - EXPECT_EQ(rippleDecoded, data); - EXPECT_EQ(flickrDecoded, data); -} - -TEST(base58, Tests) { - EXPECT_EQ(test_base2.encode(uinteger_t("000f", 16)), "1111"); - // EXPECT_EQ(test_base2.encode(uinteger_t("00ff", 16)), "011111111"); // ->> - EXPECT_EQ(test_base2.encode(uinteger_t("00ff", 16)), "11111111"); - EXPECT_EQ(test_base2.encode(uinteger_t("0fff", 16)), "111111111111"); - EXPECT_EQ(test_base2.encode(uinteger_t("ff00ff00", 16)), "11111111000000001111111100000000"); - // EXPECT_EQ(test_base16.encode(uinteger_t("0000000f", 16)), "000f"); // ->> - EXPECT_EQ(test_base16.encode(uinteger_t("0000000f", 16)), "f"); - // EXPECT_EQ(test_base16.encode(uinteger_t("000fff", 16)), "0fff"); // ->> - EXPECT_EQ(test_base16.encode(uinteger_t("000fff", 16)), "fff"); - EXPECT_EQ(test_base16.encode(uinteger_t("ffff", 16)), "ffff"); - // EXPECT_EQ(test_base58.encode(uinteger_t("", 16)), ""); // ->> - EXPECT_EQ(test_base58.encode(uinteger_t("", 16)), "1"); - EXPECT_EQ(test_base58.encode(uinteger_t("61", 16)), "2g"); - EXPECT_EQ(test_base58.encode(uinteger_t("626262", 16)), "a3gV"); - EXPECT_EQ(test_base58.encode(uinteger_t("636363", 16)), "aPEr"); - EXPECT_EQ(test_base58.encode(uinteger_t("73696d706c792061206c6f6e6720737472696e67", 16)), "2cFupjhnEsSn59qHXstmK2ffpLv2"); - // EXPECT_EQ(test_base58.encode(uinteger_t("00eb15231dfceb60925886b67d065299925915aeb172c06647", 16)), "1NS17iag9jJgTHD1VXjvLCEnZuQ3rJDE9L"); // ->> - EXPECT_EQ(test_base58.encode(uinteger_t("00eb15231dfceb60925886b67d065299925915aeb172c06647", 16)), "NS17iag9jJgTHD1VXjvLCEnZuQ3rJDE9L"); - EXPECT_EQ(test_base58.encode(uinteger_t("516b6fcd0f", 16)), "ABnLTmg"); - EXPECT_EQ(test_base58.encode(uinteger_t("bf4f89001e670274dd", 16)), "3SEo3LWLoPntC"); - EXPECT_EQ(test_base58.encode(uinteger_t("572e4794", 16)), "3EFU7m"); - EXPECT_EQ(test_base58.encode(uinteger_t("ecac89cad93923c02321", 16)), "EJDM8drfXA6uyA"); - EXPECT_EQ(test_base58.encode(uinteger_t("10c8511e", 16)), "Rt5zm"); - // EXPECT_EQ(test_base58.encode(uinteger_t("00000000000000000000", 16)), "1111111111"); // ->> - EXPECT_EQ(test_base58.encode(uinteger_t("00000000000000000000", 16)), "1"); - EXPECT_EQ(test_base58.encode(uinteger_t("801184cd2cdd640ca42cfc3a091c51d549b2f016d454b2774019c2b2d2e08529fd206ec97e", 16)), "5Hx15HFGyep2CfPxsJKe2fXJsCVn5DEiyoeGGF6JZjGbTRnqfiD"); - // EXPECT_EQ(test_base58.encode(uinteger_t("003c176e659bea0f29a3e9bf7880c112b1b31b4dc826268187", 16)), "16UjcYNBG9GTK4uq2f7yYEbuifqCzoLMGS"); // ->> - EXPECT_EQ(test_base58.encode(uinteger_t("003c176e659bea0f29a3e9bf7880c112b1b31b4dc826268187", 16)), "6UjcYNBG9GTK4uq2f7yYEbuifqCzoLMGS"); - EXPECT_EQ(test_base58.encode(uinteger_t("ffffffffffffffffffff", 16)), "FPBt6CHo3fovdL"); - EXPECT_EQ(test_base58.encode(uinteger_t("ffffffffffffffffffffffffff", 16)), "NKioeUVktgzXLJ1B3t"); - EXPECT_EQ(test_base58.encode(uinteger_t("ffffffffffffffffffffffffffffffff", 16)), "YcVfxkQb6JRzqk5kF2tNLv"); - EXPECT_EQ(test_base2.encode(uinteger_t("fb6f9ac3", 16)), "11111011011011111001101011000011"); - EXPECT_EQ(test_base2.encode(uinteger_t("179eea7a", 16)), "10111100111101110101001111010"); - EXPECT_EQ(test_base2.encode(uinteger_t("6db825db", 16)), "1101101101110000010010111011011"); - EXPECT_EQ(test_base2.encode(uinteger_t("93976aa7", 16)), "10010011100101110110101010100111"); - EXPECT_EQ(test_base58.encode(uinteger_t("ef41b9ce7e830af7", 16)), "h26E62FyLQN"); - EXPECT_EQ(test_base58.encode(uinteger_t("606cbc791036d2e9", 16)), "H8Sa62HVULG"); - EXPECT_EQ(test_base58.encode(uinteger_t("bdcb0ea69c2c8ec8", 16)), "YkESUPpnfoD"); - EXPECT_EQ(test_base58.encode(uinteger_t("1a2358ba67fb71d5", 16)), "5NaBN89ajtQ"); - EXPECT_EQ(test_base58.encode(uinteger_t("e6173f0f4d5fb5d7", 16)), "fVAoezT1ZkS"); - EXPECT_EQ(test_base58.encode(uinteger_t("91c81cbfdd58bbd2", 16)), "RPGNSU3bqTX"); - EXPECT_EQ(test_base58.encode(uinteger_t("329e0bf0e388dbfe", 16)), "9U41ZkwwysT"); - EXPECT_EQ(test_base58.encode(uinteger_t("30b10393210fa65b", 16)), "99NMW3WHjjY"); - EXPECT_EQ(test_base58.encode(uinteger_t("ab3bdd18e3623654", 16)), "VeBbqBb4rCT"); - EXPECT_EQ(test_base58.encode(uinteger_t("fe29d1751ec4af8a", 16)), "jWhmYLN9dUm"); - EXPECT_EQ(test_base58.encode(uinteger_t("c1273ab5488769807d", 16)), "3Tbh4kL3WKW6g"); - EXPECT_EQ(test_base58.encode(uinteger_t("6c7907904de934f852", 16)), "2P5jNYhfpTJxy"); - EXPECT_EQ(test_base58.encode(uinteger_t("05f0be055db47a0dc9", 16)), "5PN768Kr5oEp"); - EXPECT_EQ(test_base58.encode(uinteger_t("3511e6206829b35b12", 16)), "gBREojGaJ6DF"); - EXPECT_EQ(test_base58.encode(uinteger_t("d1c7c2ddc4a459d503", 16)), "3fsekq5Esq2KC"); - EXPECT_EQ(test_base58.encode(uinteger_t("1f88efd17ab073e9a1", 16)), "QHJbmW9ZY7jn"); - EXPECT_EQ(test_base58.encode(uinteger_t("0f45dadf4e64c5d5c2", 16)), "CGyVUMmCKLRf"); - EXPECT_EQ(test_base58.encode(uinteger_t("de1e5c5f718bb7fafa", 16)), "3pyy8U7w3KUa5"); - EXPECT_EQ(test_base58.encode(uinteger_t("123190b93e9a49a46c", 16)), "ES3DeFrG1zbd"); - EXPECT_EQ(test_base58.encode(uinteger_t("8bee94a543e7242e5a", 16)), "2nJnuWyLpGf6y"); - EXPECT_EQ(test_base58.encode(uinteger_t("9fd5f2285362f5cfd834", 16)), "9yqFhqeewcW3pF"); - EXPECT_EQ(test_base58.encode(uinteger_t("6987bac63ad23828bb31", 16)), "6vskE5Y1LhS3U4"); - EXPECT_EQ(test_base58.encode(uinteger_t("19d4a0f9d459cc2a08b0", 16)), "2TAsHPuaLhh5Aw"); - EXPECT_EQ(test_base58.encode(uinteger_t("a1e47ffdbea5a807ab26", 16)), "A6XzPgSUJDf1W5"); - EXPECT_EQ(test_base58.encode(uinteger_t("35c231e5b3a86a9b83db", 16)), "42B8reRwPAAoAa"); - EXPECT_EQ(test_base58.encode(uinteger_t("b2351012a48b8347c351", 16)), "B1hPyomGx4Vhqa"); - EXPECT_EQ(test_base58.encode(uinteger_t("71d402694dd9517ea653", 16)), "7Pv2SyAQx2Upu8"); - EXPECT_EQ(test_base58.encode(uinteger_t("55227c0ec7955c2bd6e8", 16)), "5nR64BkskyjHMq"); - EXPECT_EQ(test_base58.encode(uinteger_t("17b3d8ee7907c1be34df", 16)), "2LEg7TxosoxTGS"); - EXPECT_EQ(test_base58.encode(uinteger_t("7e7bba7b68bb8e95827f", 16)), "879o2ATGnmYyAW"); - EXPECT_EQ(test_base58.encode(uinteger_t("db9c13f5ba7654b01407fb", 16)), "wTYfxjDVbiks874"); - EXPECT_EQ(test_base58.encode(uinteger_t("6186449d20f5fd1e6c4393", 16)), "RBeiWhzZNL6VtMG"); - EXPECT_EQ(test_base58.encode(uinteger_t("5248751cebf4ad1c1a83c3", 16)), "MQSVNnc8ehFCqtW"); - EXPECT_EQ(test_base58.encode(uinteger_t("32090ef18cd479fc376a74", 16)), "DQdu351ExDaeYeX"); - EXPECT_EQ(test_base58.encode(uinteger_t("7cfa5d6ed1e467d986c426", 16)), "XzW67T5qfEnFcaZ"); - EXPECT_EQ(test_base58.encode(uinteger_t("9d8707723c7ede51103b6d", 16)), "g4eTCg6QJnB1UU4"); - EXPECT_EQ(test_base58.encode(uinteger_t("6f4d1e392d6a9b4ed8b223", 16)), "Ubo7kZY5aDpAJp2"); - EXPECT_EQ(test_base58.encode(uinteger_t("38057d98797cd39f80a0c9", 16)), "EtjQ2feamJvuqse"); - EXPECT_EQ(test_base58.encode(uinteger_t("de7e59903177e20880e915", 16)), "xB2N7yRBnDYEoT2"); - EXPECT_EQ(test_base58.encode(uinteger_t("b2ea24a28bc4a60b5c4b8d", 16)), "mNFMpJ2P3TGYqhv"); - EXPECT_EQ(test_base58.encode(uinteger_t("cf84938958589b6ffba6114d", 16)), "4v8ZbsGh2ePz5sipt"); - EXPECT_EQ(test_base58.encode(uinteger_t("dee13be7b8d8a08c94a3c02a", 16)), "5CwmE9jQqwtHkTF45"); - EXPECT_EQ(test_base58.encode(uinteger_t("14cb9c6b3f8cd2e02710f569", 16)), "Pm85JHVAAdeUdxtp"); - EXPECT_EQ(test_base58.encode(uinteger_t("ca3f2d558266bdcc44c79cb5", 16)), "4pMwomBAQHuUnoLUC"); - EXPECT_EQ(test_base58.encode(uinteger_t("c031215be44cbad745f38982", 16)), "4dMeTrcxiVw9RWvj3"); - EXPECT_EQ(test_base58.encode(uinteger_t("1435ab1dbc403111946270a5", 16)), "P7wX3sCWNrbqhBEC"); - EXPECT_EQ(test_base58.encode(uinteger_t("d8c6e4d775e7a66a0d0f9f41", 16)), "56GLoRDGWGuGJJwPN"); - EXPECT_EQ(test_base58.encode(uinteger_t("dcee35e74f0fd74176fce2f4", 16)), "5Ap1zyuYiJJFwWcMR"); - EXPECT_EQ(test_base58.encode(uinteger_t("bfcc0ca4b4855d1cf8993fc0", 16)), "4cvafQW4PEhARKv9D"); - EXPECT_EQ(test_base58.encode(uinteger_t("e02a3ac25ece7b54584b670a", 16)), "5EMM28xkpxZ1kkVUM"); - EXPECT_EQ(test_base58.encode(uinteger_t("fe4d938fc3719f064cabb4bfff", 16)), "NBXKkbHwrAsiWTLAk6"); - EXPECT_EQ(test_base58.encode(uinteger_t("9289cb4f6b15c57e6086b87ea5", 16)), "DCvDpjEXEbHjZqskKv"); - EXPECT_EQ(test_base58.encode(uinteger_t("fc266f35626b3612bfe978537b", 16)), "N186PVoBWrNre35BGE"); - EXPECT_EQ(test_base58.encode(uinteger_t("33ff08c06d92502bf258c07166", 16)), "5LC4SoW6jmTtbkbePw"); - EXPECT_EQ(test_base58.encode(uinteger_t("6a81cac1f3666bc59dc67b1c3c", 16)), "9sXgUySUzwiqDU5WHy"); - EXPECT_EQ(test_base58.encode(uinteger_t("9dfb8e7e744c544c0f323ea729", 16)), "EACsmGmkgcwsrPFzLg"); - EXPECT_EQ(test_base58.encode(uinteger_t("1e7a1e284f70838b38442b682b", 16)), "3YEVk9bE7rw5qExMkv"); - EXPECT_EQ(test_base58.encode(uinteger_t("2a862ad57901a8235f5dc74eaf", 16)), "4YS259nuTLfeXa5Wuc"); - EXPECT_EQ(test_base58.encode(uinteger_t("74c82096baef21f9d3089e5462", 16)), "AjAcKEhUfrqm8smvM7"); - EXPECT_EQ(test_base58.encode(uinteger_t("7a3edbc23d7b600263920261cc", 16)), "BBZXyRgey5S5DDZkcK"); - EXPECT_EQ(test_base58.encode(uinteger_t("20435664c357d25a9c8df751cf4f", 16)), "CrwNL6Fbv4pbRx1zd9g"); - EXPECT_EQ(test_base58.encode(uinteger_t("51a7aa87cf5cb1c12d045ec3422d", 16)), "X27NHGgKXmGzzQvDtpC"); - EXPECT_EQ(test_base58.encode(uinteger_t("344d2e116aa26f1062a2cb6ebbef", 16)), "LEDLDvL1Hg4qt1efVXt"); - EXPECT_EQ(test_base58.encode(uinteger_t("6941add7be4c0b5c7163e4928f8e", 16)), "fhMyN6gwoxE3uYraVzV"); - EXPECT_EQ(test_base58.encode(uinteger_t("10938fcbb7c4ab991649734a14bf", 16)), "76TPrSDxzGQfSzMu974"); - EXPECT_EQ(test_base58.encode(uinteger_t("eafe04d944ba504e9af9117b07de", 16)), "2VPgov563ryfe4L2Bj6M"); - EXPECT_EQ(test_base58.encode(uinteger_t("58d0aeed4d35da20b6f052127edf", 16)), "ZenZhXF9YwP8nQvNtNz"); - EXPECT_EQ(test_base58.encode(uinteger_t("d734984e2f5aecf25f7a3e353f8a", 16)), "2N7n3jFsTdyN49Faoq6h"); - EXPECT_EQ(test_base58.encode(uinteger_t("57d873fdb405b7daf4bafa62068a", 16)), "ZJ7NwoP4wHvwyZg3Wjs"); - EXPECT_EQ(test_base58.encode(uinteger_t("bda4ec7b40d0d65ca95dec4c4d3b", 16)), "2CijxjsNyvqTwPCfDcpA"); - EXPECT_EQ(test_base58.encode(uinteger_t("826c4abdceb1b91f0d4ad665f86d2e", 16)), "4edfvuDQu9KzVxLuXHfMo"); - EXPECT_EQ(test_base58.encode(uinteger_t("e7ecb35d07e65b960cb10574a4f51a", 16)), "7VLRYdB4cToipp2J2p3v9"); - EXPECT_EQ(test_base58.encode(uinteger_t("4f2d72ead87b31d6869fba39eac6dc", 16)), "3DUjqJRcfdWhpsrLrGcQs"); - EXPECT_EQ(test_base58.encode(uinteger_t("8b4f5788d60030950d5dfbf94c585d", 16)), "4u44JSRH5jP5X39YhPsmE"); - EXPECT_EQ(test_base58.encode(uinteger_t("ee4c0a0025d1a74ace9fe349355cc5", 16)), "7fgACjABRQUGUEpN6VBBA"); - EXPECT_EQ(test_base58.encode(uinteger_t("58ac05b9a0b4b66083ff1d489b8d84", 16)), "3UtJPyTwGXapcxHx8Rom5"); - EXPECT_EQ(test_base58.encode(uinteger_t("1aa35c05e1132e8e049aafaef035d8", 16)), "kE2eSU7gM2619pT82iGP"); - EXPECT_EQ(test_base58.encode(uinteger_t("771b0c28608484562a292e5d5d2b30", 16)), "4LGYeWhyfrjUByibUqdVR"); - EXPECT_EQ(test_base58.encode(uinteger_t("78ff9a0e56f9e88dc1cd654b40d019", 16)), "4PLggs66qAdbmZgkaPihe"); - EXPECT_EQ(test_base58.encode(uinteger_t("6d691bdd736346aa5a0a95b373b2ab", 16)), "44Y6qTgSvRMkdqpQ5ufkN"); -} diff --git a/contrib/base-x/uinteger_t.hh b/contrib/base-x/uinteger_t.hh deleted file mode 100644 index 901460f75c4..00000000000 --- a/contrib/base-x/uinteger_t.hh +++ /dev/null @@ -1,2546 +0,0 @@ -/* -uinteger_t.hh -An arbitrary precision unsigned integer type for C++ - -Copyright (c) 2017 German Mendez Bravo (Kronuz) @ german dot mb at gmail.com -Copyright (c) 2013 - 2017 Jason Lee @ calccrypto at gmail.com - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - -With much help from Auston Sterling - -Thanks to Stefan Deigmüller for finding -a bug in operator*. - -Thanks to François Dessenne for convincing me -to do a general rewrite of this class. - -Germán Mández Bravo (Kronuz) converted Jason Lee's uint128_t -to header-only and extended to arbitrary bit length. -*/ - -#ifndef __uint_t__ -#define __uint_t__ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define ASSERT assert - -// Compatibility inlines -#ifndef __has_builtin // Optional of course -#define __has_builtin(x) 0 // Compatibility with non-clang compilers -#endif - -#if defined _MSC_VER -# define HAVE___ADDCARRY_U64 -# define HAVE___SUBBORROW_U64 -# define HAVE___ADDCARRY_U32 -# define HAVE___SUBBORROW_U32 -# define HAVE___ADDCARRY_U16 -# define HAVE___SUBBORROW_U16 -# define HAVE___UMUL128 -# define HAVE___UMUL64 -# define HAVE___UMUL32 -# include -#endif - -#if (defined(__clang__) && __has_builtin(__builtin_clzll)) || (defined(__GNUC__ ) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))) -# define HAVE____BUILTIN_CLZLL -#endif -#if (defined(__clang__) && __has_builtin(__builtin_clzl)) || (defined(__GNUC__ ) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))) -# define HAVE____BUILTIN_CLZL -#endif -#if (defined(__clang__) && __has_builtin(__builtin_clz)) || (defined(__GNUC__ ) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))) -# define HAVE____BUILTIN_CLZ -#endif -#if (defined(__clang__) && __has_builtin(__builtin_addcll)) -# define HAVE____BUILTIN_ADDCLL -#endif -#if (defined(__clang__) && __has_builtin(__builtin_addcl)) -# define HAVE____BUILTIN_ADDCL -#endif -#if (defined(__clang__) && __has_builtin(__builtin_addc)) -# define HAVE____BUILTIN_ADDC -#endif -#if (defined(__clang__) && __has_builtin(__builtin_subcll)) -# define HAVE____BUILTIN_SUBCLL -#endif -#if (defined(__clang__) && __has_builtin(__builtin_subcl)) -# define HAVE____BUILTIN_SUBCL -#endif -#if (defined(__clang__) && __has_builtin(__builtin_subc)) -# define HAVE____BUILTIN_SUBC -#endif - -#if defined __SIZEOF_INT128__ -#define HAVE____INT128_T -#endif - - -#ifndef DIGIT_T -#define DIGIT_T std::uint64_t -#endif - -#ifndef HALF_DIGIT_T -#define HALF_DIGIT_T std::uint32_t -#endif - -class uinteger_t; - -namespace std { // This is probably not a good idea - // Give uinteger_t type traits - template <> struct is_arithmetic : std::true_type {}; - template <> struct is_integral : std::true_type {}; - template <> struct is_unsigned : std::true_type {}; -} - -class uinteger_t { -public: - using digit = DIGIT_T; - using half_digit = HALF_DIGIT_T; - - static constexpr std::size_t digit_octets = sizeof(digit); // number of octets per digit - static constexpr std::size_t digit_bits = digit_octets * 8; // number of bits per digit - static constexpr std::size_t half_digit_octets = sizeof(half_digit); // number of octets per half_digit - static constexpr std::size_t half_digit_bits = half_digit_octets * 8; // number of bits per half_digit - - using container = std::vector; - - template - struct is_result { - static const bool value = false; - }; - - template - struct is_result> { - static const bool value = true; - }; - - template - struct is_result> { - static const bool value = true; - }; - -private: - static_assert(digit_octets == half_digit_octets * 2, "half_digit must be exactly half the size of digit"); - - static constexpr std::size_t karatsuba_cutoff = 1024 / digit_bits; - static constexpr double growth_factor = 1.5; - - std::size_t _begin; - std::size_t _end; - container _value_instance; - container& _value; - bool _carry; - -public: - // Window to vector (uses _begin and _end) - - void reserve(std::size_t sz) { - _value.reserve(sz + _begin); - } - - std::size_t grow(std::size_t n) { - // expands the vector using a growth factor - // and returns the new capacity. - auto cc = _value.capacity(); - if (n >= cc) { - cc = n * growth_factor; - _value.reserve(cc); - } - return cc; - } - - void resize(std::size_t sz) { - grow(sz + _begin); - _value.resize(sz + _begin); - } - - void resize(std::size_t sz, const digit& c) { - grow(sz + _begin); - _value.resize(sz + _begin, c); - } - - void clear() { - _value.clear(); - _begin = 0; - _end = 0; - _carry = false; - } - - digit* data() noexcept { - return _value.data() + _begin; - } - - const digit* data() const noexcept { - return _value.data() + _begin; - } - - std::size_t size() const noexcept { - return _end ? _end - _begin : _value.size() - _begin; - } - - void prepend(std::size_t sz, const digit& c) { - // Efficiently prepend by growing backwards by growth factor - auto min = std::min(_begin, sz); - if (min) { - // If there is some space before `_begin`, we try using it first: - _begin -= min; - std::fill_n(_value.begin() + _begin, min, c); - sz -= min; - } - if (sz) { - ASSERT(_begin == 0); // _begin should be 0 in here - // If there's still more room needed, we grow the vector: - // Ex.: grow using prepend(3, y) - // sz = 3 - // _begin = 0 (B) - // _end = 1 (E) - // initially (capacity == 12): - // |xxxxxxxxxx- | - // B E - // after reclaiming space after `_end` (same capacity == 12): - // |xxxxxxxxxx | - // B - // _end = 0 - // csz = 10 - // grow returns the new capacity (22) - // isz = 12 (22 - 10) - // _begin = 9 (12 - 3) - // after (capacity == (12 + 3) * 1.5 == 22): - // |---------yyyxxxxxxxxxx| - // B - if (_end) { - // reclaim space after `_end` - _value.resize(_end); - _end = 0; - } - auto csz = _value.size(); - auto isz = grow(csz + sz) - csz; - _value.insert(_value.begin(), isz, c); - _begin = isz - sz; - } - } - - void prepend(const digit& c) { - prepend(1, c); - } - - void prepend(const uinteger_t& num) { - prepend(num.size(), 0); - std::copy(num.begin(), num.end(), begin()); - } - - void append(std::size_t sz, const digit& c) { - // Efficiently append by growing by growth factor - if (_end) { - // reclaim space after `_end` - _value.resize(_end); - _end = 0; - } - auto nsz = _value.size() + sz; - grow(nsz); - _value.resize(nsz, c); - } - - void append(const digit& c) { - append(1, c); - } - - void append(const uinteger_t& num) { - auto sz = num.size(); - append(sz, 0); - std::copy(num.begin(), num.end(), end() - sz); - } - - container::iterator begin() noexcept { - return _value.begin() + _begin; - } - - container::const_iterator begin() const noexcept { - return _value.cbegin() + _begin; - } - - container::iterator end() noexcept { - return _end ? _value.begin() + _end : _value.end(); - } - - container::const_iterator end() const noexcept { - return _end ? _value.cbegin() + _end : _value.cend(); - } - - container::reverse_iterator rbegin() noexcept { - return _end ? container::reverse_iterator(_value.begin() + _end) : _value.rbegin(); - } - - container::const_reverse_iterator rbegin() const noexcept { - return _end ? container::const_reverse_iterator(_value.cbegin() + _end) : _value.crbegin(); - } - - container::reverse_iterator rend() noexcept { - return container::reverse_iterator(_value.begin() + _begin); - } - - container::const_reverse_iterator rend() const noexcept { - return container::const_reverse_iterator(_value.cbegin() + _begin); - } - - container::reference front() { - return *begin(); - } - - container::const_reference front() const { - return *begin(); - } - - container::reference back() { - return *rbegin(); - } - - container::const_reference back() const { - return *rbegin(); - } - -private: - // Optimized primitives for operations - - static digit _bits(digit x) { - #if defined HAVE____BUILTIN_CLZLL - if (digit_octets == sizeof(unsigned long long)) { - return x ? digit_bits - __builtin_clzll(x) : 1; - } - #endif - #if defined HAVE____BUILTIN_CLZL - if (digit_octets == sizeof(unsigned long)) { - return x ? digit_bits - __builtin_clzl(x) : 1; - } - #endif - #if defined HAVE____BUILTIN_CLZ - if (digit_octets == sizeof(unsigned)) { - return x ? digit_bits - __builtin_clz(x) : 1; - } - #endif - { - digit c = x ? 0 : 1; - while (x) { - x >>= 1; - ++c; - } - return c; - } - } - - static digit _mult(digit x, digit y, digit* lo) { - #if defined HAVE___UMUL128 - if (digit_bits == 64) { - digit h; - digit l = _umul128(x, y, &h); // _umul128(x, y, *hi) -> lo - return h; - } - #endif - #if defined HAVE___UMUL64 - if (digit_bits == 32) { - digit h; - digit l = _umul64(x, y, &h); // _umul64(x, y, *hi) -> lo - return h; - } - #endif - #if defined HAVE___UMUL32 - if (digit_bits == 16) { - digit h; - digit l = _umul32(x, y, &h); // _umul32(x, y, *hi) -> lo - return h; - } - #endif - #if defined HAVE____INT128_T - if (digit_bits == 64) { - auto r = static_cast<__uint128_t>(x) * static_cast<__uint128_t>(y); - *lo = r; - return r >> digit_bits; - } - #endif - if (digit_bits == 64) { - digit x0 = x & 0xffffffffUL; - digit x1 = x >> 32; - digit y0 = y & 0xffffffffUL; - digit y1 = y >> 32; - - digit u = (x0 * y0); - digit v = (x1 * y0) + (u >> 32); - digit w = (x0 * y1) + (v & 0xffffffffUL); - - *lo = (w << 32) + (u & 0xffffffffUL); // low - return (x1 * y1) + (v >> 32) + (w >> 32); // high - } if (digit_bits == 32) { - auto r = static_cast(x) * static_cast(y); - *lo = r; - return r >> 32; - } if (digit_bits == 16) { - auto r = static_cast(x) * static_cast(y); - *lo = r; - return r >> 16; - } if (digit_bits == 8) { - auto r = static_cast(x) * static_cast(y); - *lo = r; - return r >> 8; - } - } - - static digit _multadd(digit x, digit y, digit a, digit c, digit* lo) { - #if defined HAVE___UMUL128 && defined HAVE___ADDCARRY_U64 - if (digit_bits == 64) { - digit h; - digit l = _umul128(x, y, &h); // _umul128(x, y, *hi) -> lo - return h + _addcarry_u64(c, l, a, lo); // _addcarry_u64(carryin, x, y, *sum) -> carryout - } - #endif - #if defined HAVE___UMUL64 && defined HAVE___ADDCARRY_U32 - if (digit_bits == 32) { - digit h; - digit l = _umul64(x, y, &h); // _umul64(x, y, *hi) -> lo - return h + _addcarry_u32(c, l, a, lo); // _addcarry_u32(carryin, x, y, *sum) -> carryout - } - #endif - #if defined HAVE___UMUL32 && defined HAVE___ADDCARRY_U16 - if (digit_bits == 16) { - digit h; - digit l = _umul32(x, y, &h); // _umul32(x, y, *hi) -> lo - return h + _addcarry_u16(c, l, a, lo); // _addcarry_u16(carryin, x, y, *sum) -> carryout - } - #endif - #if defined HAVE____INT128_T - if (digit_bits == 64) { - auto r = static_cast<__uint128_t>(x) * static_cast<__uint128_t>(y) + static_cast<__uint128_t>(a) + static_cast<__uint128_t>(c); - *lo = r; - return r >> digit_bits; - } - #endif - if (digit_bits == 64) { - digit x0 = x & 0xffffffffUL; - digit x1 = x >> 32; - digit y0 = y & 0xffffffffUL; - digit y1 = y >> 32; - - digit u = (x0 * y0) + (a & 0xffffffffUL) + (c & 0xffffffffUL); - digit v = (x1 * y0) + (u >> 32) + (a >> 32) + (c >> 32); - digit w = (x0 * y1) + (v & 0xffffffffUL); - - *lo = (w << 32) + (u & 0xffffffffUL); // low - return (x1 * y1) + (v >> 32) + (w >> 32); // high - } - if (digit_bits == 32) { - auto r = static_cast(x) * static_cast(y) + static_cast(a) + static_cast(c); - *lo = r; - return r >> 32; - } - if (digit_bits == 16) { - auto r = static_cast(x) * static_cast(y) + static_cast(a) + static_cast(c); - *lo = r; - return r >> 16; - } - if (digit_bits == 8) { - auto r = static_cast(x) * static_cast(y) + static_cast(a) + static_cast(c); - *lo = r; - return r >> 8; - } - } - - static digit _divmod(digit x_hi, digit x_lo, digit y, digit* result) { - #if defined HAVE____INT128_T - if (digit_bits == 64) { - auto x = static_cast<__uint128_t>(x_hi) << digit_bits | static_cast<__uint128_t>(x_lo); - digit q = x / y; - digit r = x % y; - - *result = q; - return r; - } - #endif - if (digit_bits == 64) { - // quotient - digit q = x_lo << 1; - - // remainder - digit r = x_hi; - - digit carry = x_lo >> 63; - int i; - - for (i = 0; i < 64; i++) { - auto tmp = r >> 63; - r <<= 1; - r |= carry; - carry = tmp; - - if (carry == 0) { - if (r >= y) { - carry = 1; - } else { - tmp = q >> 63; - q <<= 1; - q |= carry; - carry = tmp; - continue; - } - } - - r -= y; - r -= (1 - carry); - carry = 1; - tmp = q >> 63; - q <<= 1; - q |= carry; - carry = tmp; - } - - *result = q; - return r; - } - if (digit_bits == 32) { - auto x = static_cast(x_hi) << 32 | static_cast(x_lo); - digit q = x / y; - digit r = x % y; - - *result = q; - return r; - } - if (digit_bits == 16) { - auto x = static_cast(x_hi) << 16 | static_cast(x_lo); - digit q = x / y; - digit r = x % y; - - *result = q; - return r; - } - if (digit_bits == 8) { - auto x = static_cast(x_hi) << 8 | static_cast(x_lo); - digit q = x / y; - digit r = x % y; - - *result = q; - return r; - } - } - - static digit _addcarry(digit x, digit y, digit c, digit* result) { - #if defined HAVE___ADDCARRY_U64 - if (digit_bits == 64) { - return _addcarry_u64(c, x, y, result); // _addcarry_u64(carryin, x, y, *sum) -> carryout - } - #endif - #if defined HAVE___ADDCARRY_U32 - if (digit_bits == 32) { - return _addcarry_u32(c, x, y, result); // _addcarry_u32(carryin, x, y, *sum) -> carryout - } - #endif - #if defined HAVE___ADDCARRY_U16 - if (digit_bits == 16) { - return _addcarry_u16(c, x, y, result); // _addcarry_u16(carryin, x, y, *sum) -> carryout - } - #endif - #if defined HAVE____BUILTIN_ADDCLL - if (digit_octets == sizeof(unsigned long long)) { - unsigned long long carryout; - *result = __builtin_addcll(x, y, c, &carryout); // __builtin_addcll(x, y, carryin, *carryout) -> sum - return carryout; - } - #endif - #if defined HAVE____BUILTIN_ADDCL - if (digit_octets == sizeof(unsigned long)) { - unsigned long carryout; - *result = __builtin_addcl(x, y, c, &carryout); // __builtin_addcl(x, y, carryin, *carryout) -> sum - return carryout; - } - #endif - #if defined HAVE____BUILTIN_ADDC - if (digit_octets == sizeof(unsigned)) { - unsigned carryout; - *result = __builtin_addc(x, y, c, &carryout); // __builtin_addc(x, y, carryin, *carryout) -> sum - return carryout; - } - #endif - #if defined HAVE____INT128_T - if (digit_bits == 64) { - auto r = static_cast<__uint128_t>(x) + static_cast<__uint128_t>(y) + static_cast<__uint128_t>(c); - *result = r; - return static_cast(r >> digit_bits); - } - #endif - if (digit_bits == 64) { - digit x0 = x & 0xffffffffUL; - digit x1 = x >> 32; - digit y0 = y & 0xffffffffUL; - digit y1 = y >> 32; - - auto u = x0 + y0 + c; - auto v = x1 + y1 + static_cast(u >> 32); - *result = (v << 32) + (u & 0xffffffffUL); - return static_cast(v >> 32); - } - if (digit_bits == 32) { - auto r = static_cast(x) + static_cast(y) + static_cast(c); - *result = r; - return static_cast(r >> 32); - } - if (digit_bits == 16) { - auto r = static_cast(x) + static_cast(y) + static_cast(c); - *result = r; - return static_cast(r >> 16); - } - if (digit_bits == 8) { - auto r = static_cast(x) + static_cast(y) + static_cast(c); - *result = r; - return static_cast(r >> 8); - } - } - - static digit _subborrow(digit x, digit y, digit c, digit* result) { - #if defined HAVE___SUBBORROW_U64 - if (digit_bits == 64) { - return _subborrow_u64(c, x, y, result); // _subborrow_u64(carryin, x, y, *sum) -> carryout - } - #endif - #if defined HAVE___SUBBORROW_U32 - if (digit_bits == 64) { - return _subborrow_u32(c, x, y, result); // _subborrow_u32(carryin, x, y, *sum) -> carryout - } - #endif - #if defined HAVE___SUBBORROW_U16 - if (digit_bits == 64) { - return _subborrow_u16(c, x, y, result); // _subborrow_u16(carryin, x, y, *sum) -> carryout - } - #endif - #if defined HAVE____BUILTIN_SUBCLL - if (digit_octets == sizeof(unsigned long long)) { - unsigned long long carryout; - *result = __builtin_subcll(x, y, c, &carryout); // __builtin_subcll(x, y, carryin, *carryout) -> sum - return carryout; - } - #endif - #if defined HAVE____BUILTIN_SUBCL - if (digit_octets == sizeof(unsigned long)) { - unsigned long carryout; - *result = __builtin_subcl(x, y, c, &carryout); // __builtin_subcl(x, y, carryin, *carryout) -> sum - return carryout; - } - #endif - #if defined HAVE____BUILTIN_SUBC - if (digit_octets == sizeof(unsigned)) { - unsigned carryout; - *result = __builtin_subc(x, y, c, &carryout); // __builtin_subc(x, y, carryin, *carryout) -> sum - return carryout; - } - #endif - #if defined HAVE____INT128_T - if (digit_bits == 64) { - auto r = static_cast<__uint128_t>(x) - static_cast<__uint128_t>(y) - static_cast<__uint128_t>(c); - *result = r; - return static_cast(r >> 64); - } - #endif - if (digit_bits == 64) { - digit x0 = x & 0xffffffffUL; - digit x1 = x >> 32; - digit y0 = y & 0xffffffffUL; - digit y1 = y >> 32; - - auto u = x0 - y0 - c; - auto v = x1 - y1 - static_cast(u >> 32); - *result = (v << 32) + (u & 0xffffffffUL); - return static_cast(v >> 32); - } - if (digit_bits == 32) { - auto r = static_cast(x) - static_cast(y) - static_cast(c); - *result = r; - return static_cast(r >> 32); - } - if (digit_bits == 16) { - auto r = static_cast(x) - static_cast(y) - static_cast(c); - *result = r; - return static_cast(r >> 16); - } - if (digit_bits == 8) { - auto r = static_cast(x) - static_cast(y) - static_cast(c); - *result = r; - return static_cast(r >> 8); - } - } - - // Helper functions - - void trim(digit mask = 0) { - auto rit = rbegin(); - auto rit_e = rend(); - - // Masks the last value of internal vector - mask &= (digit_bits - 1); - if (mask && rit != rit_e) { - *rit &= (static_cast(1) << mask) - 1; - } - - // Removes all unused zeros from the internal vector - auto rit_f = std::find_if(rit, rit_e, [](const digit& c) { return c; }); - resize(rit_e - rit_f); // shrink - } - - static constexpr char chr(int ord) { - constexpr const char _[256] = { - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', - 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', - 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - }; - return _[ord]; - } - - static constexpr int ord(int chr) { - constexpr const int _[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, - - -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, - -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - }; - return _[chr]; - } - -public: - static constexpr unsigned base_bits(int base) { - constexpr const unsigned _[256] = { - 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, - }; - return _[base - 1]; - } - - static constexpr unsigned base_size(int base) { - constexpr const unsigned _[256] = { - 0, 64, 41, 32, 28, 25, 23, 22, 21, 20, 19, 18, 18, 17, 17, 16, - 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, - - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 8, - }; - return _[base - 1]; - } - - static const uinteger_t uint_0() { - static uinteger_t uint_0(0); - return uint_0; - } - - static const uinteger_t uint_1() { - static uinteger_t uint_1(1); - return uint_1; - } - -private: - // Public Implementation -#ifdef UINT_T_PUBLIC_IMPLEMENTATION -public: -#endif - static uinteger_t& bitwise_and(uinteger_t& lhs, const uinteger_t& rhs) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - if (lhs_sz > rhs_sz) { - lhs.resize(rhs_sz); // shrink - } - - auto lhs_it = lhs.begin(); - auto lhs_it_e = lhs.end(); - - auto rhs_it = rhs.begin(); - - for (; lhs_it != lhs_it_e; ++lhs_it, ++rhs_it) { - *lhs_it &= *rhs_it; - } - - // Finish up - lhs.trim(); - return lhs; - } - - static uinteger_t& bitwise_and(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - auto result_sz = std::max(lhs_sz, rhs_sz); - result.resize(result_sz); - - // not using `end()` because resize of `result.resize()` could have - // resized `lhs` or `rhs` if `result` is also either `rhs` or `lhs`. - auto lhs_it = lhs.begin(); - auto lhs_it_e = lhs_it + lhs_sz; - - auto rhs_it = rhs.begin(); - auto rhs_it_e = rhs_it + rhs_sz; - - auto it = result.begin(); - - if (lhs_sz < rhs_sz) { - for (; lhs_it != lhs_it_e; ++lhs_it, ++rhs_it, ++it) { - *it = *lhs_it & *rhs_it; - } - for (; rhs_it != rhs_it_e; ++rhs_it, ++it) { - *it = 0; - } - } else { - for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it, ++it) { - *it = *lhs_it & *rhs_it; - } - for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { - *it = 0; - } - } - - // Finish up - result.trim(); - return result; - } - - static uinteger_t bitwise_and(const uinteger_t& lhs, const uinteger_t& rhs) { - uinteger_t result; - bitwise_and(result, lhs, rhs); - return result; - } - - static uinteger_t& bitwise_or(uinteger_t& lhs, const uinteger_t& rhs) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - if (lhs_sz < rhs_sz) { - lhs.resize(rhs_sz, 0); // grow - } - - auto lhs_it = lhs.begin(); - - auto rhs_it = rhs.begin(); - auto rhs_it_e = rhs.end(); - - for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it) { - *lhs_it |= *rhs_it; - } - - // Finish up - lhs.trim(); - return lhs; - } - - static uinteger_t& bitwise_or(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - auto result_sz = std::max(lhs_sz, rhs_sz); - result.resize(result_sz); - - // not using `end()` because resize of `result.resize()` could have - // resized `lhs` or `rhs` if `result` is also either `rhs` or `lhs`. - auto lhs_it = lhs.begin(); - auto lhs_it_e = lhs_it + lhs_sz; - - auto rhs_it = rhs.begin(); - auto rhs_it_e = rhs_it + rhs_sz; - - auto it = result.begin(); - - if (lhs_sz < rhs_sz) { - for (; lhs_it != lhs_it_e; ++lhs_it, ++rhs_it, ++it) { - *it = *lhs_it | *rhs_it; - } - for (; rhs_it != rhs_it_e; ++rhs_it, ++it) { - *it = *rhs_it; - } - } else { - for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it, ++it) { - *it = *lhs_it | *rhs_it; - } - for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { - *it = *lhs_it; - } - } - - // Finish up - result.trim(); - return result; - } - static uinteger_t bitwise_or(const uinteger_t& lhs, const uinteger_t& rhs) { - uinteger_t result; - bitwise_or(result, lhs, rhs); - return result; - } - - static uinteger_t& bitwise_xor(uinteger_t& lhs, const uinteger_t& rhs) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - if (lhs_sz < rhs_sz) { - lhs.resize(rhs_sz, 0); // grow - } - - auto lhs_it = lhs.begin(); - - auto rhs_it = rhs.begin(); - auto rhs_it_e = rhs.end(); - - for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it) { - *lhs_it ^= *rhs_it; - } - - // Finish up - lhs.trim(); - return lhs; - } - - static uinteger_t& bitwise_xor(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - auto result_sz = std::max(lhs_sz, rhs_sz); - result.resize(result_sz); - - // not using `end()` because resize of `result.resize()` could have - // resized `lhs` or `rhs` if `result` is also either `rhs` or `lhs`. - auto lhs_it = lhs.begin(); - auto lhs_it_e = lhs_it + lhs_sz; - - auto rhs_it = rhs.begin(); - auto rhs_it_e = rhs_it + rhs_sz; - - auto it = result.begin(); - - if (lhs_sz < rhs_sz) { - for (; lhs_it != lhs_it_e; ++lhs_it, ++rhs_it, ++it) { - *it = *lhs_it ^ *rhs_it; - } - for (; rhs_it != rhs_it_e; ++rhs_it, ++it) { - *it = *rhs_it; - } - } else { - for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it, ++it) { - *it = *lhs_it ^ *rhs_it; - } - for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { - *it = *lhs_it; - } - } - - // Finish up - result.trim(); - return result; - } - - static uinteger_t bitwise_xor(const uinteger_t& lhs, const uinteger_t& rhs) { - uinteger_t result; - bitwise_xor(result, lhs, rhs); - return result; - } - - static uinteger_t& bitwise_inv(uinteger_t& lhs) { - auto lhs_sz = lhs.size(); - - auto b = lhs.bits(); - - if (!lhs_sz) { - lhs.append(0); - } - - // not using `end()` because resize of `result.resize()` could have - // resized `lhs` if `result` is also `lhs`. - auto lhs_it = lhs.begin(); - auto lhs_it_e = lhs_it + lhs_sz; - - for (; lhs_it != lhs_it_e; ++lhs_it) { - *lhs_it = ~*lhs_it; - } - - // Finish up - lhs.trim(b ? b : 1); - return lhs; - } - - static uinteger_t& bitwise_inv(uinteger_t& result, const uinteger_t& lhs) { - auto lhs_sz = lhs.size(); - - auto b = lhs.bits(); - - auto result_sz = lhs_sz ? lhs_sz : 1; - result.resize(result_sz); - - // not using `end()` because resize of `result.resize()` could have - // resized `lhs` if `result` is also `lhs`. - auto lhs_it = lhs.begin(); - auto lhs_it_e = lhs_it + lhs_sz; - - auto it = result.begin(); - auto it_e = it + result_sz; - - for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { - *it = ~*lhs_it; - } - for (; it != it_e; ++it) { - *it = ~static_cast(0); - } - - // Finish up - result.trim(b ? b : 1); - return result; - } - - static uinteger_t bitwise_inv(const uinteger_t& lhs) { - uinteger_t result; - bitwise_inv(result, lhs); - return result; - } - - static uinteger_t& bitwise_lshift(uinteger_t& lhs, const uinteger_t& rhs) { - if (!rhs) { - return lhs; - } - - uinteger_t shifts_q; - uinteger_t shifts_r; - auto _digit_bits = digit_bits; - auto uint_digit_bits = uinteger_t(_digit_bits); - divmod(shifts_q, shifts_r, rhs, uint_digit_bits); - std::size_t shifts = static_cast(shifts_q); - std::size_t shift = static_cast(shifts_r); - - if (shifts) { - lhs.prepend(shifts, 0); - } - if (shift) { - digit shifted = 0; - auto lhs_it = lhs.begin() + shifts; - auto lhs_it_e = lhs.end(); - for (; lhs_it != lhs_it_e; ++lhs_it) { - auto v = (*lhs_it << shift) | shifted; - shifted = *lhs_it >> (_digit_bits - shift); - *lhs_it = v; - } - if (shifted) { - lhs.append(shifted); - } - } - - // Finish up - lhs.trim(); - return lhs; - } - - static uinteger_t& bitwise_lshift(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { - if (&result._value == &lhs._value) { - bitwise_lshift(result, rhs); - return result; - } - if (!rhs) { - result = lhs; - return result; - } - - auto lhs_sz = lhs.size(); - - uinteger_t shifts_q; - uinteger_t shifts_r; - auto _digit_bits = digit_bits; - auto uint_digit_bits = uinteger_t(_digit_bits); - divmod(shifts_q, shifts_r, rhs, uint_digit_bits); - std::size_t shifts = static_cast(shifts_q); - std::size_t shift = static_cast(shifts_r); - - auto result_sz = lhs_sz + shifts; - result.grow(result_sz + 1); - result.resize(shifts, 0); - result.resize(result_sz); - - // not using `end()` because resize of `result.resize()` could have - // resized `lhs` if `result` is also `lhs`. - auto lhs_it = lhs.begin(); - auto lhs_it_e = lhs_it + lhs_sz; - - auto it = result.begin() + shifts; - - if (shift) { - digit shifted = 0; - for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { - auto v = (*lhs_it << shift) | shifted; - shifted = *lhs_it >> (_digit_bits - shift); - *it = v; - } - if (shifted) { - result.append(shifted); - } - } else { - for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { - *it = *lhs_it; - } - } - - // Finish up - result.trim(); - return result; - } - - static uinteger_t bitwise_lshift(const uinteger_t& lhs, const uinteger_t& rhs) { - uinteger_t result; - bitwise_lshift(result, lhs, rhs); - return result; - } - - static uinteger_t& bitwise_rshift(uinteger_t& lhs, const uinteger_t& rhs) { - if (!rhs) { - return lhs; - } - - auto lhs_sz = lhs.size(); - - auto _digit_bits = digit_bits; - if (compare(rhs, uinteger_t(lhs_sz * _digit_bits)) >= 0) { - lhs = uint_0(); - return lhs; - } - - uinteger_t shifts_q; - uinteger_t shifts_r; - auto uint_digit_bits = uinteger_t(_digit_bits); - divmod(shifts_q, shifts_r, rhs, uint_digit_bits); - std::size_t shifts = static_cast(shifts_q); - std::size_t shift = static_cast(shifts_r); - - if (shifts) { - lhs._begin += shifts; - } - if (shift) { - digit shifted = 0; - auto lhs_rit = lhs.rbegin(); - auto lhs_rit_e = lhs.rend(); - for (; lhs_rit != lhs_rit_e; ++lhs_rit) { - auto v = (*lhs_rit >> shift) | shifted; - shifted = *lhs_rit << (_digit_bits - shift); - *lhs_rit = v; - } - lhs.trim(); - } - - return lhs; - } - - static uinteger_t& bitwise_rshift(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { - if (&result._value == &lhs._value) { - bitwise_lshift(result, rhs); - return result; - } - if (!rhs) { - result = lhs; - return result; - } - - auto lhs_sz = lhs.size(); - - auto _digit_bits = digit_bits; - if (compare(rhs, uinteger_t(lhs_sz * _digit_bits)) >= 0) { - result = uint_0(); - return result; - } - - uinteger_t shifts_q; - uinteger_t shifts_r; - auto uint_digit_bits = uinteger_t(_digit_bits); - divmod(shifts_q, shifts_r, rhs, uint_digit_bits); - std::size_t shifts = static_cast(shifts_q); - std::size_t shift = static_cast(shifts_r); - - auto result_sz = lhs_sz - shifts; - result.resize(result_sz); - - // not using `end()` because resize of `result.resize()` could have - // resized `lhs` if `result` is also `lhs`. - auto lhs_rit = lhs.rbegin(); - auto lhs_rit_e = lhs_rit + lhs_sz - shifts; - - auto rit = result.rbegin(); - auto rit_e = rit + result_sz; - - if (shift) { - digit shifted = 0; - for (; lhs_rit != lhs_rit_e; ++lhs_rit, ++rit) { - ASSERT(rit != rit_e); (void)(rit_e); - auto v = (*lhs_rit >> shift) | shifted; - shifted = *lhs_rit << (_digit_bits - shift); - *rit = v; - } - } else { - for (; lhs_rit != lhs_rit_e; ++lhs_rit, ++rit) { - ASSERT(rit != rit_e); (void)(rit_e); - *rit = *lhs_rit; - } - } - - // Finish up - result.trim(); - return result; - } - - static uinteger_t bitwise_rshift(const uinteger_t& lhs, const uinteger_t& rhs) { - uinteger_t result; - bitwise_rshift(result, lhs, rhs); - return result; - } - - static int compare(const uinteger_t& lhs, const uinteger_t& rhs) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - if (lhs_sz > rhs_sz) return 1; - if (lhs_sz < rhs_sz) return -1; - - auto lhs_rit = lhs.rbegin(); - auto lhs_rit_e = lhs.rend(); - - auto rhs_rit = rhs.rbegin(); - - for (; lhs_rit != lhs_rit_e && *lhs_rit == *rhs_rit; ++lhs_rit, ++rhs_rit); - - if (lhs_rit != lhs_rit_e) { - if (*lhs_rit > *rhs_rit) return 1; - if (*lhs_rit < *rhs_rit) return -1; - } - - return 0; - } - - static uinteger_t& long_add(uinteger_t& lhs, const uinteger_t& rhs) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - if (lhs_sz < rhs_sz) { - lhs.reserve(rhs_sz + 1); - lhs.resize(rhs_sz, 0); // grow - } - - // not using `end()` because resize of `lhs.resize()` could have - // resized `lhs`. - auto lhs_it = lhs.begin(); - auto lhs_it_e = lhs_it + lhs_sz; - - auto rhs_it = rhs.begin(); - auto rhs_it_e = rhs_it + rhs_sz; - - digit carry = 0; - if (lhs_sz < rhs_sz) { - for (; lhs_it != lhs_it_e; ++rhs_it, ++lhs_it) { - carry = _addcarry(*lhs_it, *rhs_it, carry, &*lhs_it); - } - for (; carry && rhs_it != rhs_it_e; ++rhs_it, ++lhs_it) { - carry = _addcarry(0, *rhs_it, carry, &*lhs_it); - } - for (; rhs_it != rhs_it_e; ++rhs_it, ++lhs_it) { - *lhs_it = *rhs_it; - } - } else { - for (; rhs_it != rhs_it_e; ++rhs_it, ++lhs_it) { - carry = _addcarry(*lhs_it, *rhs_it, carry, &*lhs_it); - } - for (; carry && lhs_it != lhs_it_e; ++lhs_it) { - carry = _addcarry(*lhs_it, 0, carry, &*lhs_it); - } - } - - if (carry) { - lhs.append(1); - } - - lhs._carry = false; - - // Finish up - lhs.trim(); - return lhs; - } - - static uinteger_t& long_add(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - auto result_sz = std::max(lhs_sz, rhs_sz); - result.reserve(result_sz + 1); - result.resize(result_sz, 0); - - // not using `end()` because resize of `result.resize()` could have - // resized `lhs` or `rhs` if `result` is also either `rhs` or `lhs`. - auto lhs_it = lhs.begin(); - auto lhs_it_e = lhs_it + lhs_sz; - - auto rhs_it = rhs.begin(); - auto rhs_it_e = rhs_it + rhs_sz; - - auto it = result.begin(); - - digit carry = 0; - if (lhs_sz < rhs_sz) { - for (; lhs_it != lhs_it_e; ++lhs_it, ++rhs_it, ++it) { - carry = _addcarry(*lhs_it, *rhs_it, carry, &*it); - } - for (; carry && rhs_it != rhs_it_e; ++rhs_it, ++it) { - carry = _addcarry(0, *rhs_it, carry, &*it); - } - for (; rhs_it != rhs_it_e; ++rhs_it, ++it) { - *it = *rhs_it; - } - } else { - for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it, ++it) { - carry = _addcarry(*lhs_it, *rhs_it, carry, &*it); - } - for (; carry && lhs_it != lhs_it_e; ++lhs_it, ++it) { - carry = _addcarry(*lhs_it, 0, carry, &*it); - } - for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { - *it = *lhs_it; - } - } - - if (carry) { - result.append(1); - } - result._carry = false; - - // Finish up - result.trim(); - return result; - } - - static uinteger_t& add(uinteger_t& lhs, const uinteger_t& rhs) { - // First try saving some calculations: - if (!rhs) { - return lhs; - } - if (!lhs) { - lhs = rhs; - return lhs; - } - - return long_add(lhs, rhs); - } - - static uinteger_t& add(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { - // First try saving some calculations: - if (!rhs) { - result = lhs; - return result; - } - if (!lhs) { - result = rhs; - return result; - } - - return long_add(result, lhs, rhs); - } - - static uinteger_t add(const uinteger_t& lhs, const uinteger_t& rhs) { - uinteger_t result; - add(result, lhs, rhs); - return result; - } - - static uinteger_t& long_sub(uinteger_t& lhs, const uinteger_t& rhs) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - if (lhs_sz < rhs_sz) { - lhs.resize(rhs_sz, 0); // grow - } - - // not using `end()` because resize of `lhs.resize()` could have - // resized `lhs`. - auto lhs_it = lhs.begin(); - auto lhs_it_e = lhs_it + lhs_sz; - - auto rhs_it = rhs.begin(); - auto rhs_it_e = rhs_it + rhs_sz; - - digit borrow = 0; - if (lhs_sz < rhs_sz) { - for (; lhs_it != lhs_it_e; ++lhs_it, ++rhs_it) { - borrow = _subborrow(*lhs_it, *rhs_it, borrow, &*lhs_it); - } - for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it) { - borrow = _subborrow(0, *rhs_it, borrow, &*lhs_it); - } - } else { - for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it) { - borrow = _subborrow(*lhs_it, *rhs_it, borrow, &*lhs_it); - } - for (; borrow && lhs_it != lhs_it_e; ++lhs_it) { - borrow = _subborrow(*lhs_it, 0, borrow, &*lhs_it); - } - } - - lhs._carry = borrow; - - // Finish up - lhs.trim(); - return lhs; - } - - static uinteger_t& long_sub(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - auto result_sz = std::max(lhs_sz, rhs_sz); - result.resize(result_sz, 0); - - // not using `end()` because resize of `result.resize()` could have - // resized `lhs` or `rhs` if `result` is also either `rhs` or `lhs`. - auto lhs_it = lhs.begin(); - auto lhs_it_e = lhs_it + lhs_sz; - - auto rhs_it = rhs.begin(); - auto rhs_it_e = rhs_it + rhs_sz; - - auto it = result.begin(); - - digit borrow = 0; - if (lhs_sz < rhs_sz) { - for (; lhs_it != lhs_it_e; ++lhs_it, ++rhs_it, ++it) { - borrow = _subborrow(*lhs_it, *rhs_it, borrow, &*it); - } - for (; rhs_it != rhs_it_e; ++rhs_it, ++it) { - borrow = _subborrow(0, *rhs_it, borrow, &*it); - } - } else { - for (; rhs_it != rhs_it_e; ++lhs_it, ++rhs_it, ++it) { - borrow = _subborrow(*lhs_it, *rhs_it, borrow, &*it); - } - for (; borrow && lhs_it != lhs_it_e; ++lhs_it, ++it) { - borrow = _subborrow(*lhs_it, 0, borrow, &*it); - } - for (; lhs_it != lhs_it_e; ++lhs_it, ++it) { - *it = *lhs_it; - } - } - - result._carry = borrow; - - // Finish up - result.trim(); - return result; - } - - static uinteger_t& sub(uinteger_t& lhs, const uinteger_t& rhs) { - // First try saving some calculations: - if (!rhs) { - return lhs; - } - - return long_sub(lhs, rhs); - } - - static uinteger_t& sub(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { - // First try saving some calculations: - if (!rhs) { - result = lhs; - return result; - } - - return long_sub(result, lhs, rhs); - } - - static uinteger_t sub(const uinteger_t& lhs, const uinteger_t& rhs) { - uinteger_t result; - sub(result, lhs, rhs); - return result; - } - - // Single word long multiplication - // Fastests, but ONLY for single sized rhs - static uinteger_t& single_mult(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - ASSERT(rhs_sz == 1); (void)(rhs_sz); - auto n = rhs.front(); - - uinteger_t tmp; - tmp.resize(lhs_sz + 1, 0); - - auto it_lhs = lhs.begin(); - auto it_lhs_e = lhs.end(); - - auto it_result = tmp.begin(); - - digit carry = 0; - for (; it_lhs != it_lhs_e; ++it_lhs, ++it_result) { - carry = _multadd(*it_lhs, n, 0, carry, &*it_result); - } - if (carry) { - *it_result = carry; - } - - result = std::move(tmp); - - // Finish up - result.trim(); - return result; - } - - static uinteger_t& long_mult(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - if (lhs_sz > rhs_sz) { - // rhs should be the largest: - return long_mult(result, rhs, lhs); - } - - if (lhs_sz == 1) { - return single_mult(result, rhs, lhs); - } - - uinteger_t tmp; - tmp.resize(lhs_sz + rhs_sz, 0); - - auto it_lhs = lhs.begin(); - auto it_lhs_e = lhs.end(); - - auto it_rhs = rhs.begin(); - auto it_rhs_e = rhs.end(); - - auto it_result = tmp.begin(); - auto it_result_s = it_result; - auto it_result_l = it_result; - - for (; it_lhs != it_lhs_e; ++it_lhs, ++it_result) { - if (auto lhs_it_val = *it_lhs) { - auto _it_rhs = it_rhs; - auto _it_result = it_result; - digit carry = 0; - for (; _it_rhs != it_rhs_e; ++_it_rhs, ++_it_result) { - carry = _multadd(*_it_rhs, lhs_it_val, *_it_result, carry, &*_it_result); - } - if (carry) { - *_it_result++ = carry; - } - if (it_result_l < _it_result) { - it_result_l = _it_result; - } - } - } - - tmp.resize(it_result_l - it_result_s); // shrink - - result = std::move(tmp); - - // Finish up - result.trim(); - return result; - } - - // A helper for Karatsuba multiplication to split a number in two, at n. - static std::pair karatsuba_mult_split(const uinteger_t& num, std::size_t n) { - const uinteger_t a(num, num._begin, num._begin + n); - const uinteger_t b(num, num._begin + n, num._end); - return std::make_pair(std::move(a), std::move(b)); - } - - // If rhs has at least twice the digits of lhs, and lhs is big enough that - // Karatsuba would pay off *if* the inputs had balanced sizes. - // View rhs as a sequence of slices, each with lhs.size() digits, - // and multiply the slices by lhs, one at a time. - static uinteger_t& karatsuba_lopsided_mult(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs, std::size_t cutoff) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - ASSERT(lhs_sz > cutoff); - ASSERT(2 * lhs_sz <= rhs_sz); - - auto rhs_begin = rhs._begin; - std::size_t shift = 0; - - uinteger_t r; - while (rhs_sz > 0) { - // Multiply the next slice of rhs by lhs and add into result: - auto slice_size = std::min(lhs_sz, rhs_sz); - const uinteger_t rhs_slice(rhs, rhs_begin, rhs_begin + slice_size); - uinteger_t p; - karatsuba_mult(p, lhs, rhs_slice, cutoff); - uinteger_t rs(r, shift, 0); - add(rs, rs, p); - shift += slice_size; - rhs_sz -= slice_size; - rhs_begin += slice_size; - } - - result = std::move(r); - return result; - } - - // Karatsuba multiplication - static uinteger_t& karatsuba_mult(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs, std::size_t cutoff = 1) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - if (lhs_sz > rhs_sz) { - // rhs should be the largest: - return karatsuba_mult(result, rhs, lhs, cutoff); - } - - if (lhs_sz <= cutoff) { - return long_mult(result, lhs, rhs); - } - - // If a is too small compared to b, splitting on b gives a degenerate case - // in which Karatsuba may be (even much) less efficient than long multiplication. - if (2 * lhs_sz <= rhs_sz) { - return karatsuba_lopsided_mult(result, lhs, rhs, cutoff); - } - - // Karatsuba: - // - // A B - // x C D - // --------------------- - // AD BD - // AC BC - // --------------------- - // AC AD + BC BD - // - // AD + BC = - // AC + AD + BC + BD - AC - BD - // (A + B) (C + D) - AC - BD - - // Calculate the split point near the middle of the largest (rhs). - auto shift = rhs_sz >> 1; - - // Split to get A and B: - const auto lhs_pair = karatsuba_mult_split(lhs, shift); - const auto& A = lhs_pair.second; // hi - const auto& B = lhs_pair.first; // lo - - // Split to get C and D: - const auto rhs_pair = karatsuba_mult_split(rhs, shift); - const auto& C = rhs_pair.second; // hi - const auto& D = rhs_pair.first; // lo - - // Get the pieces: - uinteger_t AC; - karatsuba_mult(AC, A, C, cutoff); - - uinteger_t BD; - karatsuba_mult(BD, B, D, cutoff); - uinteger_t AD_BC, AB, CD; - karatsuba_mult(AD_BC, A + B, C + D, cutoff); - AD_BC -= AC; - AD_BC -= BD; - - // Join the pieces, AC and BD (can't overlap) into BD: - BD.reserve(shift * 2 + AC.size()); - BD.resize(shift * 2, 0); - BD.append(AC); - - // And add AD_BC to the middle: (AC BD) + ( AD + BC ): - uinteger_t BDs(BD, shift, 0); - add(BDs, BDs, AD_BC); - - result = std::move(BD); - - // Finish up - result.trim(); - return result; - } - - static uinteger_t& mult(uinteger_t& lhs, const uinteger_t& rhs) { - // Hard to see how this could have a further optimized implementation. - return mult(lhs, lhs, rhs); - } - - static uinteger_t& mult(uinteger_t& result, const uinteger_t& lhs, const uinteger_t& rhs) { - // First try saving some calculations: - if (!lhs || !rhs) { - result = uint_0(); - return result; - } - if (compare(lhs, uint_1()) == 0) { - result = rhs; - return result; - } - if (compare(rhs, uint_1()) == 0) { - result = lhs; - return result; - } - - return karatsuba_mult(result, lhs, rhs, karatsuba_cutoff); - } - - static uinteger_t mult(const uinteger_t& lhs, const uinteger_t& rhs) { - uinteger_t result; - mult(result, lhs, rhs); - return result; - } - - // Single word long division - // Fastests, but ONLY for single sized rhs - static std::pair, std::reference_wrapper> single_divmod(uinteger_t& quotient, uinteger_t& remainder, const uinteger_t& lhs, const uinteger_t& rhs) { - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - - ASSERT(rhs_sz == 1); (void)(rhs_sz); - auto n = rhs.front(); - - auto rit_lhs = lhs.rbegin(); - auto rit_lhs_e = lhs.rend(); - - auto q = uint_0(); - q.resize(lhs_sz, 0); - auto rit_q = q.rbegin(); - - digit r = 0; - for (; rit_lhs != rit_lhs_e; ++rit_lhs, ++rit_q) { - r = _divmod(r, *rit_lhs, n, &*rit_q); - } - - q.trim(); - - quotient = std::move(q); - remainder = r; - return std::make_pair(std::ref(quotient), std::ref(remainder)); - } - - // Implementation of Knuth's Algorithm D - static std::pair, std::reference_wrapper> knuth_divmod(uinteger_t& quotient, uinteger_t& remainder, const uinteger_t& lhs, const uinteger_t& rhs) { - uinteger_t v(lhs); - uinteger_t w(rhs); - - auto v_size = v.size(); - auto w_size = w.size(); - ASSERT(v_size >= w_size && w_size >= 2); - - // D1. normalize: shift rhs left so that its top digit is >= 63 bits. - // shift lhs left by the same amount. Results go into w and v. - auto d = uinteger_t(digit_bits - _bits(w.back())); - v <<= d; - w <<= d; - - if (*v.rbegin() >= *w.rbegin()) { - v.append(0); - } - v_size = v.size(); - v.append(0); - - // Now *v.rbegin() < *w.rbegin() so quotient has at most - // (and usually exactly) k = v.size() - w.size() digits. - auto k = v_size - w_size; - auto q = uint_0(); - q.resize(k + 1, 0); - - auto rit_q = q.rend() - (k + 1); - - auto it_v_b = v.begin(); - auto it_v_k = it_v_b + k; - - auto it_w = w.begin(); - auto it_w_e = w.end(); - - auto rit_w = w.rbegin(); - auto wm1 = *rit_w++; - auto wm2 = *rit_w; - - // D2. inner loop: divide v[k+0..k+n] by w[0..n] - for (; it_v_k >= it_v_b; --it_v_k, ++rit_q) { - // D3. Compute estimate quotient digit q; may overestimate by 1 (rare) - digit _q; - auto _r = _divmod(*(it_v_k + w_size), *(it_v_k + w_size - 1), wm1, &_q); - digit mullo = 0; - auto mulhi = _mult(_q, wm2, &mullo); - auto rlo = *(it_v_k + w_size - 2); - while (mulhi > _r || (mulhi == _r && mullo > rlo)) { - --_q; - if (_addcarry(_r, wm1, 0, &_r)) { - break; - } - mulhi = _mult(_q, wm2, &mullo); - } - - // D4. Multiply and subtract _q * w0[0:size_w] from vk[0:size_w+1] - auto _it_v = it_v_k; - auto _it_w = it_w; - mulhi = 0; - digit carry = 0; - for (; _it_w != it_w_e; ++_it_v, ++_it_w) { - mullo = 0; - mulhi = _multadd(*_it_w, _q, 0, mulhi, &mullo); - carry = _subborrow(*_it_v, mullo, carry, &*_it_v); - } - carry = _subborrow(*_it_v, 0, carry, &*_it_v); - - if (carry) { - // D6. Add w back if q was too large (this branch taken rarely) - --_q; - - _it_v = it_v_k; - _it_w = it_w; - carry = 0; - for (; _it_w != it_w_e; ++_it_v, ++_it_w) { - carry = _addcarry(*_it_v, *_it_w, carry, &*_it_v); - } - carry = _addcarry(*_it_v, 0, carry, &*_it_v); - } - - /* store quotient digit */ - *rit_q = _q; - } - - // D8. unnormalize: unshift remainder. - v.resize(w_size); - v >>= d; - - q.trim(); - v.trim(); - - quotient = std::move(q); - remainder = std::move(v); - return std::make_pair(std::ref(quotient), std::ref(remainder)); - } - - static std::pair, std::reference_wrapper> divmod(uinteger_t& quotient, uinteger_t& remainder, const uinteger_t& lhs, const uinteger_t& rhs) { - // First try saving some calculations: - if (!rhs) { - throw std::domain_error("Error: division or modulus by 0"); - } - auto lhs_sz = lhs.size(); - auto rhs_sz = rhs.size(); - if (lhs_sz == 1 && rhs_sz == 1) { - // Fast division and modulo for single value - auto a = *lhs.begin(); - auto b = *rhs.begin(); - quotient = a / b; - remainder = a % b; - return std::make_pair(std::ref(quotient), std::ref(remainder)); - } - if (compare(rhs, uint_1()) == 0) { - quotient = lhs; - remainder = uint_0(); - return std::make_pair(std::ref(quotient), std::ref(remainder)); - } - auto compared = compare(lhs, rhs); - if (compared == 0) { - quotient = uint_1(); - remainder = uint_0(); - return std::make_pair(std::ref(quotient), std::ref(remainder)); - } - if (!lhs || compared < 0) { - quotient = uint_0(); - remainder = lhs; - return std::make_pair(std::ref(quotient), std::ref(remainder)); - } - if (rhs_sz == 1) { - return single_divmod(quotient, remainder, lhs, rhs); - } - - return knuth_divmod(quotient, remainder, lhs, rhs); - } - - static std::pair divmod(const uinteger_t& lhs, const uinteger_t& rhs) { - uinteger_t quotient; - uinteger_t remainder; - divmod(quotient, remainder, lhs, rhs); - return std::make_pair(std::move(quotient), std::move(remainder)); - } - -private: - // Constructors - - template ::value and not std::is_same>::value>> - void _uint_t(const T& value) { - append(static_cast(value)); - } - - template ::value and not std::is_same>::value>> - void _uint_t(const T& value, Args... args) { - _uint_t(args...); - append(static_cast(value)); - } - - // This constructor creates a window view of the _value - uinteger_t(const uinteger_t& o, std::size_t begin, std::size_t end) : - _begin(begin), - _end(end), - _value(o._value), - _carry(o._carry) { } - -public: - uinteger_t() : - _begin(0), - _end(0), - _value(_value_instance), - _carry(false) { } - - uinteger_t(const uinteger_t& o) : - _begin(0), - _end(0), - _value_instance(o.begin(), o.end()), - _value(_value_instance), - _carry(o._carry) { } - - uinteger_t(uinteger_t&& o) : - _begin(std::move(o._begin)), - _end(std::move(o._end)), - _value_instance(std::move(o._value_instance)), - _value(_value_instance), - _carry(std::move(o._carry)) { } - - template ::value and not std::is_same>::value>> - uinteger_t(const T& value) : - _begin(0), - _end(0), - _value(_value_instance), - _carry(false) { - if (value) { - append(static_cast(value)); - } - } - - template ::value and not std::is_same>::value>> - uinteger_t(const T& value, Args... args) : - _begin(0), - _end(0), - _value(_value_instance), - _carry(false) { - _uint_t(args...); - append(static_cast(value)); - trim(); - } - - template ::value and not std::is_same>::value>> - uinteger_t(std::initializer_list list) : - _begin(0), - _end(0), - _value(_value_instance), - _carry(false) { - reserve(list.size()); - for (const auto& value : list) { - append(static_cast(value)); - } - trim(); - } - - template - explicit uinteger_t(T (&s)[N], int base=10) : - uinteger_t(s, N - 1, base) { } - - explicit uinteger_t(const unsigned char* bytes, std::size_t sz, int base) : - uinteger_t(strtouint(bytes, sz, base)) { } - - explicit uinteger_t(const char* bytes, std::size_t sz, int base) : - uinteger_t(strtouint(bytes, sz, base)) { } - - template - explicit uinteger_t(const std::vector& bytes, int base=10) : - uinteger_t(bytes.data(), bytes.size(), base) { } - - explicit uinteger_t(const std::string& bytes, int base=10) : - uinteger_t(bytes.data(), bytes.size(), base) { } - - // Assignment Operator - uinteger_t& operator=(const uinteger_t& o) { - _begin = 0; - _end = 0; - _value = container(o.begin(), o.end()); - _carry = o._carry; - return *this; - } - uinteger_t& operator=(uinteger_t&& o) { - _begin = std::move(o._begin); - _end = std::move(o._end); - _value_instance = std::move(o._value_instance); - _carry = std::move(o._carry); - return *this; - } - - // Typecast Operators - explicit operator bool() const { - return static_cast(size()); - } - explicit operator unsigned char() const { - return static_cast(size() ? front() : 0); - } - explicit operator unsigned short() const { - return static_cast(size() ? front() : 0); - } - explicit operator unsigned int() const { - return static_cast(size() ? front() : 0); - } - explicit operator unsigned long() const { - return static_cast(size() ? front() : 0); - } - explicit operator unsigned long long() const { - return static_cast(size() ? front() : 0); - } - explicit operator char() const { - return static_cast(size() ? front() : 0); - } - explicit operator short() const { - return static_cast(size() ? front() : 0); - } - explicit operator int() const { - return static_cast(size() ? front() : 0); - } - explicit operator long() const { - return static_cast(size() ? front() : 0); - } - explicit operator long long() const { - return static_cast(size() ? front() : 0); - } - - // Bitwise Operators - uinteger_t operator&(const uinteger_t& rhs) const { - return bitwise_and(*this, rhs); - } - - uinteger_t& operator&=(const uinteger_t& rhs) { - return bitwise_and(*this, rhs); - } - - uinteger_t operator|(const uinteger_t& rhs) const { - return bitwise_or(*this, rhs); - } - - uinteger_t& operator|=(const uinteger_t& rhs) { - return bitwise_or(*this, rhs); - } - - uinteger_t operator^(const uinteger_t& rhs) const { - return bitwise_xor(*this, rhs); - } - - uinteger_t& operator^=(const uinteger_t& rhs) { - return bitwise_xor(*this, rhs); - } - - uinteger_t operator~() const { - return bitwise_inv(*this); - } - - uinteger_t inv() { - return bitwise_inv(*this); - } - - // Bit Shift Operators - uinteger_t operator<<(const uinteger_t& rhs) const { - return bitwise_lshift(*this, rhs); - } - - uinteger_t& operator<<=(const uinteger_t& rhs) { - return bitwise_lshift(*this, rhs); - } - - uinteger_t operator>>(const uinteger_t& rhs) const { - return bitwise_rshift(*this, rhs); - } - - uinteger_t& operator>>=(const uinteger_t& rhs) { - return bitwise_rshift(*this, rhs); - } - - // Logical Operators - bool operator!() const { - return !static_cast(*this); - } - - bool operator&&(const uinteger_t& rhs) const { - return static_cast(*this) && rhs; - } - - bool operator||(const uinteger_t& rhs) const { - return static_cast(*this) || rhs; - } - - // Comparison Operators - bool operator==(const uinteger_t& rhs) const { - return compare(*this, rhs) == 0; - } - - bool operator!=(const uinteger_t& rhs) const { - return compare(*this, rhs) != 0; - } - - bool operator>(const uinteger_t& rhs) const { - return compare(*this, rhs) > 0; - } - - bool operator<(const uinteger_t& rhs) const { - return compare(*this, rhs) < 0; - } - - bool operator>=(const uinteger_t& rhs) const { - return compare(*this, rhs) >= 0; - } - - bool operator<=(const uinteger_t& rhs) const { - return compare(*this, rhs) <= 0; - } - - // Arithmetic Operators - uinteger_t operator+(const uinteger_t& rhs) const { - return add(*this, rhs); - } - - uinteger_t& operator+=(const uinteger_t& rhs) { - return add(*this, rhs); - } - - uinteger_t operator-(const uinteger_t& rhs) const { - return sub(*this, rhs); - } - - uinteger_t& operator-=(const uinteger_t& rhs) { - return sub(*this, rhs); - } - - uinteger_t operator*(const uinteger_t& rhs) const { - return mult(*this, rhs); - } - - uinteger_t& operator*=(const uinteger_t& rhs) { - return mult(*this, rhs); - } - - std::pair divmod(const uinteger_t& rhs) const { - return divmod(*this, rhs); - } - - uinteger_t operator/(const uinteger_t& rhs) const { - return divmod(*this, rhs).first; - } - - uinteger_t& operator/=(const uinteger_t& rhs) { - uinteger_t quotient; - uinteger_t remainder; - divmod(quotient, remainder, *this, rhs); - *this = std::move(quotient); - return *this; - } - - uinteger_t operator%(const uinteger_t& rhs) const { - return divmod(*this, rhs).second; - } - - uinteger_t& operator%=(const uinteger_t& rhs) { - uinteger_t quotient; - uinteger_t remainder; - divmod(quotient, remainder, *this, rhs); - *this = std::move(remainder); - return *this; - } - - // Increment Operator - uinteger_t& operator++() { - return *this += uint_1(); - } - uinteger_t operator++(int) { - uinteger_t temp(*this); - ++*this; - return temp; - } - - // Decrement Operator - uinteger_t& operator--() { - return *this -= uint_1(); - } - uinteger_t operator--(int) { - uinteger_t temp(*this); - --*this; - return temp; - } - - // Nothing done since promotion doesn't work here - uinteger_t operator+() const { - return *this; - } - - // two's complement - uinteger_t operator-() const { - return uint_0() - *this; - } - - // Get private value at index - const digit& value(std::size_t idx) const { - static const digit zero = 0; - return idx < size() ? *(begin() + idx) : zero; - } - - // Get value of bit N - bool operator[](std::size_t n) const { - auto nd = n / digit_bits; - auto nm = n % digit_bits; - return nd < size() ? (*(begin() + nd) >> nm) & 1 : 0; - } - - // Get bitsize of value - std::size_t bits() const { - auto sz = size(); - if (sz) { - return _bits(back()) + (sz - 1) * digit_bits; - } - return 0; - } - - // Get string representation of value - template ::value>> - Result str(int alphabet_base = 10) const { - auto num_sz = size(); - if (alphabet_base >= 2 && alphabet_base <= 36) { - Result result; - if (num_sz) { - auto alphabet_base_bits = base_bits(alphabet_base); - result.reserve(num_sz * base_size(alphabet_base)); - if (alphabet_base_bits) { - digit alphabet_base_mask = alphabet_base - 1; - std::size_t shift = 0; - auto ptr = reinterpret_cast(data()); - digit v = *ptr++; - v <<= half_digit_bits; - for (auto i = num_sz * 2 - 1; i; --i) { - v >>= half_digit_bits; - v |= (static_cast(*ptr++) << half_digit_bits); - do { - auto d = static_cast((v >> shift) & alphabet_base_mask); - result.push_back(chr(d)); - shift += alphabet_base_bits; - } while (shift <= half_digit_bits); - shift -= half_digit_bits; - } - v >>= (shift + half_digit_bits); - while (v) { - auto d = static_cast(v & alphabet_base_mask); - result.push_back(chr(d)); - v >>= alphabet_base_bits; - } - auto s = chr(0); - auto rit_f = std::find_if(result.rbegin(), result.rend(), [s](const char& c) { return c != s; }); - result.resize(result.rend() - rit_f); // shrink - } else { - uinteger_t uint_base = alphabet_base; - uinteger_t quotient = *this; - do { - auto r = quotient.divmod(uint_base); - auto d = static_cast(r.second); - result.push_back(chr(d)); - quotient = std::move(r.first); - } while (quotient); - } - std::reverse(result.begin(), result.end()); - } else { - result.push_back(chr(0)); - } - return result; - } else if (alphabet_base == 256) { - if (num_sz) { - auto ptr = reinterpret_cast(data()); - Result result(ptr, ptr + num_sz * digit_octets); - auto rit_f = std::find_if(result.rbegin(), result.rend(), [](const char& c) { return c; }); - result.resize(result.rend() - rit_f); // shrink - std::reverse(result.begin(), result.end()); - return result; - } else { - Result result; - result.push_back('\x00'); - return result; - } - } else { - throw std::invalid_argument("Base must be in the range [2, 36]"); - } - } - - static uinteger_t strtouint(const void* encoded, std::size_t encoded_size, int alphabet_base) { - const char* data = (const char *)encoded; - uinteger_t result; - - if (alphabet_base >= 2 && alphabet_base <= 36) { - uinteger_t alphabet_base_bits = base_bits(alphabet_base); - uinteger_t uint_base = alphabet_base; - if (alphabet_base_bits) { - for (; encoded_size; --encoded_size, ++data) { - auto d = ord(static_cast(*data)); - if (d < 0) { - throw std::invalid_argument("Error: Not a digit in base " + std::to_string(alphabet_base) + ": '" + std::string(1, *data) + "' at " + std::to_string(encoded_size)); - } - result = (result << alphabet_base_bits) | d; - } - } else { - for (; encoded_size; --encoded_size, ++data) { - auto d = ord(static_cast(*data)); - if (d < 0) { - throw std::invalid_argument("Error: Not a digit in base " + std::to_string(alphabet_base) + ": '" + std::string(1, *data) + "' at " + std::to_string(encoded_size)); - } - result = (result * uint_base) + d; - } - } - } else if (encoded_size && alphabet_base == 256) { - auto value_size = encoded_size / digit_octets; - auto value_padding = encoded_size % digit_octets; - if (value_padding) { - value_padding = digit_octets - value_padding; - ++value_size; - } - result.resize(value_size); // grow (no initialization) - *result.begin() = 0; // initialize value - auto ptr = reinterpret_cast(result.data()); - std::copy(data, data + encoded_size, ptr + value_padding); - std::reverse(ptr, ptr + value_size * digit_octets); - } else { - throw std::invalid_argument("Error: Cannot convert from base " + std::to_string(alphabet_base)); - } - - return result; - } - - template ::value>> - Result bin() const { - return str(2); - } - - template ::value>> - Result oct() const { - return str(8); - } - - template ::value>> - Result hex() const { - return str(16); - } - - template ::value>> - Result raw() const { - return str(256); - } -}; - -namespace std { // This is probably not a good idea - // Make it work with std::string() - inline std::string to_string(uinteger_t& num) { - return num.str(); - } - inline const std::string to_string(const uinteger_t& num) { - return num.str(); - } -} - -// lhs type T as first arguemnt -// If the output is not a bool, casts to type T - -// Bitwise Operators -template ::value and not std::is_same>::value>> -uinteger_t operator&(const T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) & rhs; -} - -template ::value and not std::is_same>::value>> -T& operator&=(T& lhs, const uinteger_t& rhs) { - return lhs = static_cast(rhs & lhs); -} - -template ::value and not std::is_same>::value>> -uinteger_t operator|(const T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) | rhs; -} - -template ::value and not std::is_same>::value>> -T& operator|=(T& lhs, const uinteger_t& rhs) { - return lhs = static_cast(rhs | lhs); -} - -template ::value and not std::is_same>::value>> -uinteger_t operator^(const T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) ^ rhs; -} - -template ::value and not std::is_same>::value>> -T& operator^=(T& lhs, const uinteger_t& rhs) { - return lhs = static_cast(rhs ^ lhs); -} - -// Bitshift operators -template ::value and not std::is_same>::value>> -inline uinteger_t operator<<(T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) << rhs; -} - -template ::value and not std::is_same>::value>> -T& operator<<=(T& lhs, const uinteger_t& rhs) { - return lhs = static_cast(lhs << rhs); -} - -template ::value and not std::is_same>::value>> -inline uinteger_t operator>>(T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) >> rhs; -} - -template ::value and not std::is_same>::value>> -T& operator>>=(T& lhs, const uinteger_t& rhs) { - return lhs = static_cast(lhs >> rhs); -} - -// Comparison Operators -template ::value and not std::is_same>::value>> -bool operator==(const T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) == rhs; -} - -template ::value and not std::is_same>::value>> -bool operator!=(const T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) != rhs; -} - -template ::value and not std::is_same>::value>> -bool operator>(const T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) > rhs; -} - -template ::value and not std::is_same>::value>> -bool operator<(const T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) < rhs; -} - -template ::value and not std::is_same>::value>> -bool operator>=(const T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) >= rhs; -} - -template ::value and not std::is_same>::value>> -bool operator<=(const T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) <= rhs; -} - -// Arithmetic Operators -template ::value and not std::is_same>::value>> -uinteger_t operator+(const T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) + rhs; -} - -template ::value and not std::is_same>::value>> -T& operator+=(T& lhs, const uinteger_t& rhs) { - return lhs = static_cast(rhs + lhs); -} - -template ::value and not std::is_same>::value>> -uinteger_t operator-(const T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) - rhs; -} - -template ::value and not std::is_same>::value>> -T& operator-=(T& lhs, const uinteger_t& rhs) { - return lhs = static_cast(lhs - rhs); -} - -template ::value and not std::is_same>::value>> -uinteger_t operator*(const T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) * rhs; -} - -template ::value and not std::is_same>::value>> -T& operator*=(T& lhs, const uinteger_t& rhs) { - return lhs = static_cast(rhs * lhs); -} - -template ::value and not std::is_same>::value>> -uinteger_t operator/(const T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) / rhs; -} - -template ::value and not std::is_same>::value>> -T& operator/=(T& lhs, const uinteger_t& rhs) { - return lhs = static_cast(lhs / rhs); -} - -template ::value and not std::is_same>::value>> -uinteger_t operator%(const T& lhs, const uinteger_t& rhs) { - return uinteger_t(lhs) % rhs; -} - -template ::value and not std::is_same>::value>> -T& operator%=(T& lhs, const uinteger_t& rhs) { - return lhs = static_cast(lhs % rhs); -} - -// IO Operator -inline std::ostream& operator<<(std::ostream& stream, const uinteger_t& rhs) { - if (stream.flags() & stream.oct) { - stream << rhs.str(8); - } else if (stream.flags() & stream.dec) { - stream << rhs.str(10); - } else if (stream.flags() & stream.hex) { - stream << rhs.str(16); - } - return stream; -} - -#endif From b89f01438ef987b42058862bae8aa7cc46277f44 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jun 2022 03:05:55 +0200 Subject: [PATCH 080/101] Lower mutex scope --- programs/server/embedded.xml | 1 - src/Common/ThreadStatus.cpp | 11 +++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/programs/server/embedded.xml b/programs/server/embedded.xml index 2b6c4d9f770..0f11efab8a3 100644 --- a/programs/server/embedded.xml +++ b/programs/server/embedded.xml @@ -12,7 +12,6 @@ ./ - 8589934592 true diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index b5c2e7be11f..c7d0a42ee79 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -164,12 +164,15 @@ ThreadStatus::~ThreadStatus() if (thread_group) { - std::lock_guard guard(thread_group->mutex); - thread_group->finished_threads_counters_memory.emplace_back(ThreadGroupStatus::ProfileEventsCountersAndMemory{ + ThreadGroupStatus::ProfileEventsCountersAndMemory counters + { performance_counters.getPartiallyAtomicSnapshot(), memory_tracker.get(), - thread_id, - }); + thread_id + }; + + std::lock_guard guard(thread_group->mutex); + thread_group->finished_threads_counters_memory.emplace_back(std::move(counters)); thread_group->threads.erase(this); } From e33f236d502fe9f7906392f4ceffaee44e3c4ce4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jun 2022 03:59:01 +0200 Subject: [PATCH 081/101] Slight improvement --- src/Common/ThreadStatus.cpp | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index c7d0a42ee79..37331b91d56 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -148,19 +148,10 @@ ThreadStatus::ThreadStatus() ThreadStatus::~ThreadStatus() { - try - { - if (untracked_memory > 0) - memory_tracker.alloc(untracked_memory); - else - memory_tracker.free(-untracked_memory); - } - catch (const DB::Exception &) - { - /// It's a minor tracked memory leak here (not the memory itself but it's counter). - /// We've already allocated a little bit more than the limit and cannot track it in the thread memory tracker or its parent. - tryLogCurrentException(log); - } + if (untracked_memory > 0) + memory_tracker.allocImpl(untracked_memory, false); + else + memory_tracker.free(-untracked_memory); if (thread_group) { From 0e5742fb8e386e5af544b4271b8639bb23d414ad Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jun 2022 04:28:14 +0200 Subject: [PATCH 082/101] Better hardware benchmark --- benchmark/hardware.sh | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index a4cafd501e2..ce9b3e7d99d 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -1,6 +1,6 @@ #!/bin/bash -e -TABLE="hits_100m_obfuscated" +TABLE="hits" QUERIES_FILE="queries.sql" TRIES=3 @@ -20,7 +20,7 @@ uptime echo "Starting clickhouse-server" -./clickhouse server > server.log 2>&1 & +./clickhouse server >/dev/null 2>&1 & PID=$! function finish { @@ -37,15 +37,23 @@ for i in {1..30}; do if [[ $i == 30 ]]; then exit 1; fi done -echo "Will download the dataset" -./clickhouse client --max_insert_threads $(nproc || 4) --progress --query " - CREATE OR REPLACE TABLE ${TABLE} ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) - AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')" +if [[ $(./clickhouse client --query "EXISTS hits") == '1' && $(./clickhouse client --query "SELECT count() FROM hits") == '100000000' ]]; then + echo "Dataset already downloaded" +else + echo "Will download the dataset" + ./clickhouse client --max_insert_threads $(nproc || 4) --progress --query " + CREATE OR REPLACE TABLE ${TABLE} ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) + AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')" -./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM ${TABLE}" + ./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM ${TABLE}" +fi -echo "Will prepare the dataset" -./clickhouse client --query "OPTIMIZE TABLE ${TABLE} FINAL" +if [[ $(./clickhouse client --query "SELECT count() FROM system.parts WHERE table = 'hits' AND database = 'default' AND active") == '1' ]]; then + echo "Dataset already prepared" +else + echo "Will prepare the dataset" + ./clickhouse client --query "OPTIMIZE TABLE ${TABLE} FINAL" +fi echo echo "Will perform benchmark. Results:" From 4c3f42f1c2ff39790437ed0d3500ee28cc9b8c60 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jun 2022 04:29:43 +0200 Subject: [PATCH 083/101] Better hardware benchmark --- benchmark/hardware.sh | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index ce9b3e7d99d..374f2c515de 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -1,6 +1,5 @@ #!/bin/bash -e -TABLE="hits" QUERIES_FILE="queries.sql" TRIES=3 @@ -42,24 +41,24 @@ if [[ $(./clickhouse client --query "EXISTS hits") == '1' && $(./clickhouse clie else echo "Will download the dataset" ./clickhouse client --max_insert_threads $(nproc || 4) --progress --query " - CREATE OR REPLACE TABLE ${TABLE} ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) + CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')" - ./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM ${TABLE}" + ./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM hits" fi if [[ $(./clickhouse client --query "SELECT count() FROM system.parts WHERE table = 'hits' AND database = 'default' AND active") == '1' ]]; then echo "Dataset already prepared" else echo "Will prepare the dataset" - ./clickhouse client --query "OPTIMIZE TABLE ${TABLE} FINAL" + ./clickhouse client --query "OPTIMIZE TABLE hits FINAL" fi echo echo "Will perform benchmark. Results:" echo -cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do +cat "$QUERIES_FILE" | sed "s/{table}/hits/g" | while read query; do sync if [ "${OS}" = "Darwin" ] then From 5cf6586ed631416bc1fbaad53218b930de2d1e92 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jun 2022 04:43:07 +0200 Subject: [PATCH 084/101] Add benchmark for c6a_metal --- .../hardware/results/aws_c6a_metal.json | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 website/benchmark/hardware/results/aws_c6a_metal.json diff --git a/website/benchmark/hardware/results/aws_c6a_metal.json b/website/benchmark/hardware/results/aws_c6a_metal.json new file mode 100644 index 00000000000..adaf3cce48d --- /dev/null +++ b/website/benchmark/hardware/results/aws_c6a_metal.json @@ -0,0 +1,54 @@ +[ + { + "system": "AWS c6a.metal", + "system_full": "AWS c6a.metal 192 vCPU (96 cores) 384GiB RAM, 200 GB EBS", + "time": "2022-06-25 00:00:00", + "kind": "cloud", + "result": + [ +[0.031, 0.044, 0.001], +[0.050, 0.013, 0.012], +[0.061, 0.017, 0.017], +[0.117, 0.019, 0.021], +[0.280, 0.099, 0.121], +[0.802, 0.113, 0.225], +[0.020, 0.001, 0.001], +[0.024, 0.014, 0.013], +[0.243, 0.116, 0.115], +[0.886, 0.133, 0.129], +[0.325, 0.102, 0.107], +[0.369, 0.093, 0.098], +[0.883, 0.186, 0.178], +[1.532, 0.220, 0.221], +[0.727, 0.208, 0.196], +[0.215, 0.181, 0.184], +[1.676, 0.419, 0.413], +[1.324, 0.306, 0.300], +[3.088, 0.880, 0.765], +[0.119, 0.038, 0.026], +[8.946, 0.218, 0.135], +[9.994, 0.162, 0.134], +[19.043, 0.603, 0.616], +[14.138, 0.257, 0.222], +[1.821, 0.061, 0.048], +[0.856, 0.044, 0.046], +[2.364, 0.064, 0.050], +[9.074, 0.303, 0.291], +[7.130, 0.315, 0.307], +[0.430, 0.400, 0.412], +[1.321, 0.119, 0.114], +[4.649, 0.207, 0.183], +[3.786, 0.861, 0.848], +[8.971, 0.681, 0.652], +[8.974, 0.661, 0.645], +[0.251, 0.230, 0.223], +[0.112, 0.066, 0.073], +[0.053, 0.029, 0.031], +[0.069, 0.031, 0.028], +[0.230, 0.168, 0.171], +[0.044, 0.018, 0.020], +[0.035, 0.015, 0.020], +[0.021, 0.008, 0.007] + ] + } +] From ff8c52b087c3b10e0e5ad31b65699022a8a38ee3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jun 2022 06:11:37 +0300 Subject: [PATCH 085/101] Update partition.md --- docs/en/sql-reference/statements/alter/partition.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 27178c91de8..d960f057a00 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -7,9 +7,9 @@ sidebar_label: PARTITION The following operations with [partitions](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md) are available: -- [DETACH PARTITION\|Part](#detach-partitionpart) — Moves a partition or part to the `detached` directory and forget it. -- [DROP PARTITION\|Part](#drop-partitionpart) — Deletes a partition or part. -- [ATTACH PARTITION\|Part](#attach-partitionpart) — Adds a partition or part from the `detached` directory to the table. +- [DETACH PARTITION\|PART](#detach-partitionpart) — Moves a partition or part to the `detached` directory and forget it. +- [DROP PARTITION\|PART](#drop-partitionpart) — Deletes a partition or part. +- [ATTACH PARTITION\|PART](#attach-partitionpart) — Adds a partition or part from the `detached` directory to the table. - [ATTACH PARTITION FROM](#attach-partition-from) — Copies the data partition from one table to another and adds. - [REPLACE PARTITION](#replace-partition) — Copies the data partition from one table to another and replaces. - [MOVE PARTITION TO TABLE](#move_to_table-partition) — Moves the data partition from one table to another. From 0654684bd4eb0b63e58696783734765559318906 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jun 2022 06:10:50 +0200 Subject: [PATCH 086/101] Fix wrong implementation of filesystem* functions --- src/Functions/filesystem.cpp | 2 +- .../queries/0_stateless/02345_filesystem_local.reference | 1 + tests/queries/0_stateless/02345_filesystem_local.sh | 8 ++++++++ 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02345_filesystem_local.reference create mode 100755 tests/queries/0_stateless/02345_filesystem_local.sh diff --git a/src/Functions/filesystem.cpp b/src/Functions/filesystem.cpp index bea7ffdf818..36db68617e9 100644 --- a/src/Functions/filesystem.cpp +++ b/src/Functions/filesystem.cpp @@ -36,7 +36,7 @@ public: static FunctionPtr create(ContextPtr context) { - return std::make_shared>(std::filesystem::space(context->getConfigRef().getString("path"))); + return std::make_shared>(std::filesystem::space(context->getPath())); } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override diff --git a/tests/queries/0_stateless/02345_filesystem_local.reference b/tests/queries/0_stateless/02345_filesystem_local.reference new file mode 100644 index 00000000000..9972842f982 --- /dev/null +++ b/tests/queries/0_stateless/02345_filesystem_local.reference @@ -0,0 +1 @@ +1 1 diff --git a/tests/queries/0_stateless/02345_filesystem_local.sh b/tests/queries/0_stateless/02345_filesystem_local.sh new file mode 100755 index 00000000000..6771df2ae2d --- /dev/null +++ b/tests/queries/0_stateless/02345_filesystem_local.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Checks that these functions are working inside clickhouse-local. Does not check specific values. +$CLICKHOUSE_LOCAL --query "SELECT filesystemAvailable() > 0, filesystemFree() <= filesystemCapacity()" From a45e3d47adacc59699de26b59e5966307b97b8fb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jun 2022 07:16:25 +0200 Subject: [PATCH 087/101] Remove useless codec from system.asynchronous_metric_log --- src/Interpreters/AsynchronousMetricLog.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/AsynchronousMetricLog.h b/src/Interpreters/AsynchronousMetricLog.h index 836e967a7a6..900d84868bd 100644 --- a/src/Interpreters/AsynchronousMetricLog.h +++ b/src/Interpreters/AsynchronousMetricLog.h @@ -40,7 +40,7 @@ struct AsynchronousMetricLogElement return "event_date Date CODEC(Delta(2), ZSTD(1)), " "event_time DateTime CODEC(Delta(4), ZSTD(1)), " "metric LowCardinality(String) CODEC(ZSTD(1)), " - "value Float64 CODEC(Gorilla, ZSTD(3))"; + "value Float64 CODEC(ZSTD(3))"; } }; From ccb4802ab10218523d4249e705decb05edf23945 Mon Sep 17 00:00:00 2001 From: xinhuitian Date: Sat, 25 Jun 2022 14:01:41 +0800 Subject: [PATCH 088/101] fix some wrong links in alter docs --- docs/en/sql-reference/statements/alter/column.md | 2 +- docs/en/sql-reference/statements/alter/partition.md | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 2a5e36eaa00..9387b442944 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -24,7 +24,7 @@ The following actions are supported: - [CLEAR COLUMN](#clear-column) — Resets column values. - [COMMENT COLUMN](#comment-column) — Adds a text comment to the column. - [MODIFY COLUMN](#modify-column) — Changes column’s type, default expression and TTL. -- [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties. +- [MODIFY COLUMN REMOVE](#modify-column-remove) — Removes one of the column properties. - [MATERIALIZE COLUMN](#materialize-column) — Materializes the column in the parts where the column is missing. These actions are described in detail below. diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index d960f057a00..921ffb71066 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -12,13 +12,13 @@ The following operations with [partitions](../../../engines/table-engines/merget - [ATTACH PARTITION\|PART](#attach-partitionpart) — Adds a partition or part from the `detached` directory to the table. - [ATTACH PARTITION FROM](#attach-partition-from) — Copies the data partition from one table to another and adds. - [REPLACE PARTITION](#replace-partition) — Copies the data partition from one table to another and replaces. -- [MOVE PARTITION TO TABLE](#move_to_table-partition) — Moves the data partition from one table to another. -- [CLEAR COLUMN IN PARTITION](#clear-column-partition) — Resets the value of a specified column in a partition. -- [CLEAR INDEX IN PARTITION](#clear-index-partition) — Resets the specified secondary index in a partition. +- [MOVE PARTITION TO TABLE](#move-partition-to-table) — Moves the data partition from one table to another. +- [CLEAR COLUMN IN PARTITION](#clear-column-in-partition) — Resets the value of a specified column in a partition. +- [CLEAR INDEX IN PARTITION](#clear-index-in-partition) — Resets the specified secondary index in a partition. - [FREEZE PARTITION](#freeze-partition) — Creates a backup of a partition. - [UNFREEZE PARTITION](#unfreeze-partition) — Removes a backup of a partition. -- [FETCH PARTITION\|PART](#fetch-partition) — Downloads a part or partition from another server. -- [MOVE PARTITION\|PART](#move-partition) — Move partition/data part to another disk or volume. +- [FETCH PARTITION\|PART](#fetch-partitionpart) — Downloads a part or partition from another server. +- [MOVE PARTITION\|PART](#move-partitionpart) — Move partition/data part to another disk or volume. - [UPDATE IN PARTITION](#update-in-partition) — Update data inside the partition by condition. - [DELETE IN PARTITION](#delete-in-partition) — Delete data inside the partition by condition. From 9bc23f579e6aa3b2b5317a8512fb3355d9704931 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jun 2022 06:07:41 +0200 Subject: [PATCH 089/101] Automated hardware benchmark --- benchmark/hardware.sh | 61 +++++++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 14 deletions(-) diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index 374f2c515de..c67ba2bb09c 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -58,6 +58,7 @@ echo echo "Will perform benchmark. Results:" echo +QUERY_NUM=1 cat "$QUERIES_FILE" | sed "s/{table}/hits/g" | while read query; do sync if [ "${OS}" = "Darwin" ] @@ -72,8 +73,12 @@ cat "$QUERIES_FILE" | sed "s/{table}/hits/g" | while read query; do RES=$(./clickhouse client --time --format=Null --query="$query" 2>&1 ||:) [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" [[ "$i" != $TRIES ]] && echo -n ", " + + echo "${QUERY_NUM},${i},${RES}" >> result.csv done echo "]," + + QUERY_NUM=$((QUERY_NUM + 1)) done @@ -81,22 +86,23 @@ echo echo "Benchmark complete. System info:" echo +touch {cpu_model,cpu,df,memory,memory_total,blk,mdstat,instance}.txt + if [ "${OS}" = "Darwin" ] then echo '----Version, build id-----------' ./clickhouse local --query "SELECT format('Version: {}', version())" - sw_vers | grep BuildVersion ./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw ./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))" echo '----CPU-------------------------' - sysctl hw.model - sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize' + sysctl hw.model | tee cpu_model.txt + sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize' | tee cpu.txt echo '----Disk Free and Total--------' - df -h . + df -h . | tee df.txt echo '----Memory Free and Total-------' - vm_stat + vm_stat | tee memory.txt echo '----Physical Memory Amount------' - ls -l /var/vm + ls -l /var/vm | tee memory_total.txt echo '--------------------------------' else echo '----Version, build id-----------' @@ -104,22 +110,49 @@ else ./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw ./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))" echo '----CPU-------------------------' - cat /proc/cpuinfo | grep -i -F 'model name' | uniq - lscpu + cat /proc/cpuinfo | grep -i -F 'model name' | uniq | tee cpu_model.txt + lscpu | tee cpu.txt echo '----Block Devices---------------' - lsblk + lsblk | tee blk.txt echo '----Disk Free and Total--------' - df -h . + df -h . | tee df.txt echo '----Memory Free and Total-------' - free -h + free -h | tee memory.txt echo '----Physical Memory Amount------' - cat /proc/meminfo | grep MemTotal + cat /proc/meminfo | grep MemTotal | tee memory_total.txt echo '----RAID Info-------------------' - cat /proc/mdstat + cat /proc/mdstat| tee mdstat.txt echo '--------------------------------' fi echo echo "Instance type from IMDS (if available):" -curl --connect-timeout 1 http://169.254.169.254/latest/meta-data/instance-type +curl -s --connect-timeout 1 'http://169.254.169.254/latest/meta-data/instance-type' | tee instance.txt echo + +echo "Uploading the results (if possible)" + +./clickhouse local --query " + SELECT + (SELECT generateUUIDv4()) AS test_id, + c1 AS query_num, + c2 AS try_num, + c3 AS time, + version() AS version, + now() AS test_time, + (SELECT value FROM system.settings WHERE name = 'max_threads') AS threads, + filesystemCapacity() AS fs_capacity, + filesystemAvailable() AS fs_available, + file('cpu_model.txt') AS cpu_model, + file('cpu.txt') AS cpu, + file('df.txt') AS df, + file('memory.txt') AS memory, + file('memory_total.txt') AS memory_total, + file('blk.txt') AS blk, + file('mdstat.txt') AS mdstat, + file('instance.txt') AS instance + FROM file('result.csv') +" | tee upload.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query " + INSERT INTO hardware_benchmark_results + (test_id, query_num, try_num, time, version, test_time, threads, fs_capacity, fs_available, cpu_model, cpu, df, memory, memory_total, blk, mdstat, instance) + FORMAT TSV" From d4b9d5f067cf388a1d4497ecf4689c38f3ff53d9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jun 2022 07:05:56 +0200 Subject: [PATCH 090/101] Automatic upload from hardware benchmark --- benchmark/benchmark.sh | 180 --------------------------------------- benchmark/create_dump.sh | 3 - benchmark/hardware.sh | 72 ++++++++++++++-- 3 files changed, 64 insertions(+), 191 deletions(-) delete mode 100755 benchmark/benchmark.sh delete mode 100755 benchmark/create_dump.sh diff --git a/benchmark/benchmark.sh b/benchmark/benchmark.sh deleted file mode 100755 index 5fea1bfbb88..00000000000 --- a/benchmark/benchmark.sh +++ /dev/null @@ -1,180 +0,0 @@ -#!/usr/bin/env bash -# script to run query to databases - -function usage() -{ - cat < /proc/sys/vm/drop_caches" - - if [[ "$restart_server_each_query" == "1" && "$use_service" == "1" ]]; then - echo "restart server: $etc_init_d_service restart" - sudo $etc_init_d_service restart - fi - - for i in $(seq $TIMES) - do - if [[ -f $etc_init_d_service && "$use_service" == "1" ]]; then - sudo $etc_init_d_service status - server_status=$? - expect -f $expect_file "" - - if [[ "$?" != "0" || $server_status != "0" ]]; then - echo "restart server: $etc_init_d_service restart" - sudo $etc_init_d_service restart - fi - - #wait until can connect to server - restart_timer=0 - restart_limit=60 - expect -f $expect_file "" &> /dev/null - while [ "$?" != "0" ]; do - echo "waiting" - sleep 1 - let "restart_timer = $restart_timer + 1" - if (( $restart_limit < $restart_timer )); then - sudo $etc_init_d_service restart - restart_timer=0 - fi - expect -f $expect_file "" &> /dev/null - done - fi - - echo - echo "times: $i" - - echo "query:" "$query" - expect -f $expect_file "$query" - - done - fi - - let "index = $index + 1" - done -} - -temp_test_file=temp_queries_$table_name -cat $test_file | sed s/$table_name_pattern/$table_name/g > $temp_test_file -mapfile -t test_queries < $temp_test_file - -echo "start time: $(date)" -time execute "${test_queries[@]}" -echo "stop time: $(date)" diff --git a/benchmark/create_dump.sh b/benchmark/create_dump.sh deleted file mode 100755 index 3e26f8c1426..00000000000 --- a/benchmark/create_dump.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash - -table=hits_10m; time clickhouse-client --max_bytes_before_external_sort=30000000000 --query="SELECT toInt64(WatchID), JavaEnable, Title, GoodEvent, (EventTime < toDateTime('1971-01-01 00:00:00') ? toDateTime('1971-01-01 00:00:01') : EventTime), (EventDate < toDate('1971-01-01') ? toDate('1971-01-01') : EventDate), CounterID, ClientIP, RegionID, toInt64(UserID), CounterClass, OS, UserAgent, URL, Referer, Refresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, (ClientEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : ClientEventTime), SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce, toInt64(FUniqID), OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, (LocalEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : LocalEventTime), Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, ParamCurrency, ParamCurrencyID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, toInt64(RefererHash), toInt64(URLHash), CLID FROM $table ORDER BY rand()" | corrector_utf8 > /opt/dumps/${table}_corrected.tsv diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index c67ba2bb09c..5b8b822c25c 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -58,7 +58,9 @@ echo echo "Will perform benchmark. Results:" echo +>result.csv QUERY_NUM=1 + cat "$QUERIES_FILE" | sed "s/{table}/hits/g" | while read query; do sync if [ "${OS}" = "Darwin" ] @@ -132,12 +134,11 @@ echo echo "Uploading the results (if possible)" +UUID=$(clickhouse-local --query "SELECT generateUUIDv4()") + ./clickhouse local --query " SELECT - (SELECT generateUUIDv4()) AS test_id, - c1 AS query_num, - c2 AS try_num, - c3 AS time, + '${UUID}' AS run_id, version() AS version, now() AS test_time, (SELECT value FROM system.settings WHERE name = 'max_threads') AS threads, @@ -151,8 +152,63 @@ echo "Uploading the results (if possible)" file('blk.txt') AS blk, file('mdstat.txt') AS mdstat, file('instance.txt') AS instance +" | tee meta.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query " + INSERT INTO benchmark_runs + (run_id, version, test_time, threads, fs_capacity, fs_available, cpu_model, cpu, df, memory, memory_total, blk, mdstat, instance) + FORMAT TSV" || echo "Cannot upload results." + +./clickhouse local --query " + SELECT + '${UUID}' AS run_id, + c1 AS query_num, + c2 AS try_num, + c3 AS time FROM file('result.csv') -" | tee upload.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query " - INSERT INTO hardware_benchmark_results - (test_id, query_num, try_num, time, version, test_time, threads, fs_capacity, fs_available, cpu_model, cpu, df, memory, memory_total, blk, mdstat, instance) - FORMAT TSV" +" | tee results.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query " + INSERT INTO benchmark_results + (run_id, query_num, try_num, time) + FORMAT TSV" || echo "Cannot upload results. Please send the output to feedback@clickhouse.com" + +< Date: Sat, 25 Jun 2022 07:08:37 +0200 Subject: [PATCH 091/101] Slightly better --- benchmark/hardware.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index 5b8b822c25c..0e6e4db499f 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -40,7 +40,7 @@ if [[ $(./clickhouse client --query "EXISTS hits") == '1' && $(./clickhouse clie echo "Dataset already downloaded" else echo "Will download the dataset" - ./clickhouse client --max_insert_threads $(nproc || 4) --progress --query " + ./clickhouse client --receive_timeout 1000 --max_insert_threads $(nproc || 4) --progress --query " CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')" @@ -51,7 +51,7 @@ if [[ $(./clickhouse client --query "SELECT count() FROM system.parts WHERE tabl echo "Dataset already prepared" else echo "Will prepare the dataset" - ./clickhouse client --query "OPTIMIZE TABLE hits FINAL" + ./clickhouse client --receive_timeout 1000 --query "OPTIMIZE TABLE hits FINAL" fi echo From 756fd0dac89337aefa063fd863ad6b9ea2ce5994 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jun 2022 18:27:26 +0200 Subject: [PATCH 092/101] Automated hardware benchmark --- benchmark/hardware.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index 0e6e4db499f..e1d5c56b5a8 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -203,7 +203,7 @@ CREATE USER benchmark IDENTIFIED WITH no_password SETTINGS max_rows_to_read = 1, CREATE QUOTA benchmark KEYED BY ip_address -FOR RANDOMIZED INTERVAL 1 MINUTE MAX query_inserts = 1, written_bytes = 100000, +FOR RANDOMIZED INTERVAL 1 MINUTE MAX query_inserts = 4, written_bytes = 100000, FOR RANDOMIZED INTERVAL 1 HOUR MAX query_inserts = 10, written_bytes = 500000, FOR RANDOMIZED INTERVAL 1 DAY MAX query_inserts = 50, written_bytes = 2000000 TO benchmark; From 37903bfee00e20dd1721d9ad22e60570c90be1d5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jun 2022 23:18:33 +0200 Subject: [PATCH 093/101] Fix after #38427 --- benchmark/hardware.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index e1d5c56b5a8..e8c9c58aca3 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -134,7 +134,7 @@ echo echo "Uploading the results (if possible)" -UUID=$(clickhouse-local --query "SELECT generateUUIDv4()") +UUID=$(./clickhouse local --query "SELECT generateUUIDv4()") ./clickhouse local --query " SELECT From 07f14c9bb295a054f7c3cfd07b66ee2c78eb8025 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 26 Jun 2022 00:03:01 +0200 Subject: [PATCH 094/101] Disable vectorscan-on-ARM for now --- contrib/vectorscan-cmake/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/contrib/vectorscan-cmake/CMakeLists.txt b/contrib/vectorscan-cmake/CMakeLists.txt index 140c174cd73..480ab3a384f 100644 --- a/contrib/vectorscan-cmake/CMakeLists.txt +++ b/contrib/vectorscan-cmake/CMakeLists.txt @@ -1,9 +1,11 @@ # We use vectorscan, a portable and API/ABI-compatible drop-in replacement for hyperscan. -if (ARCH_AMD64 OR ARCH_AARCH64) +if (ARCH_AMD64) option (ENABLE_VECTORSCAN "Enable vectorscan library" ${ENABLE_LIBRARIES}) endif() +# TODO: vectorscan supports ARM yet some tests involving cyrillic letters fail (PR #38171) ... needs further investigation + # TODO PPC should generally work but needs manual generation of ppc/config.h file on a PPC machine if (NOT ENABLE_VECTORSCAN) From 25cc406201f93b666dbb03771a0123a4820b7650 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Jun 2022 01:29:06 +0300 Subject: [PATCH 095/101] Update src/Common/ThreadStatus.cpp Co-authored-by: Dmitry Novik --- src/Common/ThreadStatus.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 37331b91d56..a4f99c1be1a 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -149,7 +149,7 @@ ThreadStatus::ThreadStatus() ThreadStatus::~ThreadStatus() { if (untracked_memory > 0) - memory_tracker.allocImpl(untracked_memory, false); + memory_tracker.allocNoThrow(untracked_memory); else memory_tracker.free(-untracked_memory); From 725d80d470398f81e67fa9b89f824ec8d862805f Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 25 Jun 2022 20:30:36 +0200 Subject: [PATCH 096/101] get rid of path separation --- src/Disks/DiskDecorator.h | 2 +- src/Disks/DiskRestartProxy.cpp | 4 +- src/Disks/DiskRestartProxy.h | 2 +- src/Disks/DiskWebServer.cpp | 4 +- src/Disks/DiskWebServer.h | 2 +- src/Disks/IDisk.h | 6 +-- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 37 +++++++++++----- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 32 +++++--------- src/Disks/IO/ReadBufferFromWebServer.h | 3 +- .../AzureBlobStorage/AzureObjectStorage.cpp | 20 +++++---- .../AzureBlobStorage/AzureObjectStorage.h | 22 +++++++--- .../ObjectStorages/DiskObjectStorage.cpp | 16 ++++--- src/Disks/ObjectStorages/DiskObjectStorage.h | 2 +- .../DiskObjectStorageMetadata.cpp | 18 ++++---- .../DiskObjectStorageMetadata.h | 8 ++-- ...jectStorageRemoteMetadataRestoreHelper.cpp | 14 +++--- .../DiskObjectStorageTransaction.cpp | 31 ++++++------- .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 31 +++++++------ .../ObjectStorages/HDFS/HDFSObjectStorage.h | 25 +++++++---- src/Disks/ObjectStorages/IMetadataStorage.h | 8 ++-- src/Disks/ObjectStorages/IObjectStorage.h | 43 +++++++++++-------- .../MetadataStorageFromDisk.cpp | 20 +++------ .../ObjectStorages/MetadataStorageFromDisk.h | 4 +- .../ObjectStorages/S3/S3ObjectStorage.cpp | 25 ++++++----- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 36 +++++++++++----- .../System/StorageSystemRemoteDataPaths.cpp | 4 +- 26 files changed, 233 insertions(+), 186 deletions(-) diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index 08a11cd3f03..e17a5aff3c7 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -75,7 +75,7 @@ public: void startup(ContextPtr context) override; void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override; String getCacheBasePath() const override { return delegate->getCacheBasePath(); } - std::vector getRemotePaths(const String & path) const override { return delegate->getRemotePaths(path); } + PathsWithSize getObjectStoragePaths(const String & path) const override { return delegate->getObjectStoragePaths(path); } void getRemotePathsRecursive(const String & path, std::vector & paths_map) override { return delegate->getRemotePathsRecursive(path, paths_map); } MetadataStoragePtr getMetadataStorage() override { return delegate->getMetadataStorage(); } diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp index 674179fa4a0..99dfc8e96a0 100644 --- a/src/Disks/DiskRestartProxy.cpp +++ b/src/Disks/DiskRestartProxy.cpp @@ -318,10 +318,10 @@ String DiskRestartProxy::getCacheBasePath() const return DiskDecorator::getCacheBasePath(); } -std::vector DiskRestartProxy::getRemotePaths(const String & path) const +PathsWithSize DiskRestartProxy::getObjectStoragePaths(const String & path) const { ReadLock lock (mutex); - return DiskDecorator::getRemotePaths(path); + return DiskDecorator::getObjectStoragePaths(path); } void DiskRestartProxy::getRemotePathsRecursive(const String & path, std::vector & paths_map) diff --git a/src/Disks/DiskRestartProxy.h b/src/Disks/DiskRestartProxy.h index 52d68806ab0..e483936c817 100644 --- a/src/Disks/DiskRestartProxy.h +++ b/src/Disks/DiskRestartProxy.h @@ -65,7 +65,7 @@ public: String getUniqueId(const String & path) const override; bool checkUniqueId(const String & id) const override; String getCacheBasePath() const override; - std::vector getRemotePaths(const String & path) const override; + PathsWithSize getObjectStoragePaths(const String & path) const override; void getRemotePathsRecursive(const String & path, std::vector & paths_map) override; void restart(ContextPtr context); diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index 64c020511a4..83cfbbb0150 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -170,10 +170,10 @@ std::unique_ptr DiskWebServer::readFile(const String & p auto remote_path = fs_path.parent_path() / (escapeForFileName(fs_path.stem()) + fs_path.extension().string()); remote_path = remote_path.string().substr(url.size()); - std::vector blobs_to_read; + PathsWithSize blobs_to_read; blobs_to_read.emplace_back(remote_path, iter->second.size); - auto web_impl = std::make_unique(url, path, blobs_to_read, getContext(), read_settings); + auto web_impl = std::make_unique(url, blobs_to_read, getContext(), read_settings); if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index 6182c97f70b..40754e71fa0 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -169,7 +169,7 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); } - std::vector getRemotePaths(const String &) const override { return {}; } + PathsWithSize getObjectStoragePaths(const String &) const override { return {}; } void getRemotePathsRecursive(const String &, std::vector &) override {} diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index f441f0827fb..27cac7a5456 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -219,13 +219,13 @@ public: /// Returns a list of paths because for Log family engines there might be /// multiple files in remote fs for single clickhouse file. - virtual std::vector getRemotePaths(const String &) const + virtual PathsWithSize getObjectStoragePaths(const String &) const { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePaths() not implemented for disk: {}`", getType()); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getObjectStoragePaths() not implemented for disk: {}`", getType()); } /// For one local path there might be multiple remote paths in case of Log family engines. - using LocalPathWithRemotePaths = std::pair>; + using LocalPathWithRemotePaths = std::pair; virtual void getRemotePathsRecursive(const String &, std::vector &) { diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 19f3a12b38d..804fb69a8f5 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -40,7 +40,7 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c appendFilesystemCacheLog(); } - current_file_path = fs::path(common_path_prefix) / path; + current_file_path = path; current_file_size = file_size; total_bytes_read_from_current_file = 0; @@ -50,18 +50,30 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c #if USE_AWS_S3 SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBufferImpl(const String & path, size_t file_size) { - auto remote_path = fs::path(common_path_prefix) / path; auto remote_file_reader_creator = [=, this]() { return std::make_unique( - client_ptr, bucket, remote_path, version_id, max_single_read_retries, - settings, /* use_external_buffer */true, /* offset */ 0, read_until_position, /* restricted_seek */true); + client_ptr, + bucket, + path, + version_id, + max_single_read_retries, + settings, + /* use_external_buffer */true, + /* offset */0, + read_until_position, + /* restricted_seek */true); }; if (with_cache) { return std::make_shared( - remote_path, settings.remote_fs_cache, remote_file_reader_creator, settings, query_id, read_until_position ? read_until_position : file_size); + path, + settings.remote_fs_cache, + remote_file_reader_creator, + settings, + query_id, + read_until_position ? read_until_position : file_size); } return remote_file_reader_creator(); @@ -82,24 +94,27 @@ SeekableReadBufferPtr ReadBufferFromAzureBlobStorageGather::createImplementation SeekableReadBufferPtr ReadBufferFromWebServerGather::createImplementationBufferImpl(const String & path, size_t /* file_size */) { current_file_path = path; - return std::make_unique(fs::path(uri) / path, context, settings, /* use_external_buffer */true, read_until_position); + return std::make_unique( + path, + context, + settings, + /* use_external_buffer */true, + read_until_position); } #if USE_HDFS SeekableReadBufferPtr ReadBufferFromHDFSGather::createImplementationBufferImpl(const String & path, size_t /* file_size */) { - return std::make_unique(hdfs_uri, fs::path(hdfs_directory) / path, config, settings.remote_fs_buffer_size); + return std::make_unique(hdfs_uri, path, config); } #endif ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather( - const std::string & common_path_prefix_, - const BlobsPathToSize & blobs_to_read_, + const PathsWithSize & blobs_to_read_, const ReadSettings & settings_) : ReadBuffer(nullptr, 0) - , common_path_prefix(common_path_prefix_) , blobs_to_read(blobs_to_read_) , settings(settings_) , query_id(CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr ? CurrentThread::getQueryId() : "") @@ -295,7 +310,7 @@ size_t ReadBufferFromRemoteFSGather::getFileSize() const { size_t size = 0; for (const auto & object : blobs_to_read) - size += object.bytes_size; + size += object.size; return size; } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index d6b1f9d9479..8edacfd36c9 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -27,8 +27,7 @@ friend class ReadIndirectBufferFromRemoteFS; public: ReadBufferFromRemoteFSGather( - const std::string & common_path_prefix_, - const BlobsPathToSize & blobs_to_read_, + const PathsWithSize & blobs_to_read_, const ReadSettings & settings_); ~ReadBufferFromRemoteFSGather() override; @@ -54,9 +53,7 @@ public: protected: virtual SeekableReadBufferPtr createImplementationBufferImpl(const String & path, size_t file_size) = 0; - std::string common_path_prefix; - - BlobsPathToSize blobs_to_read; + PathsWithSize blobs_to_read; ReadSettings settings; @@ -112,11 +109,10 @@ public: std::shared_ptr client_ptr_, const String & bucket_, const String & version_id_, - const std::string & common_path_prefix_, - const BlobsPathToSize & blobs_to_read_, + const PathsWithSize & blobs_to_read_, size_t max_single_read_retries_, const ReadSettings & settings_) - : ReadBufferFromRemoteFSGather(common_path_prefix_, blobs_to_read_, settings_) + : ReadBufferFromRemoteFSGather(blobs_to_read_, settings_) , client_ptr(std::move(client_ptr_)) , bucket(bucket_) , version_id(version_id_) @@ -142,12 +138,11 @@ class ReadBufferFromAzureBlobStorageGather final : public ReadBufferFromRemoteFS public: ReadBufferFromAzureBlobStorageGather( std::shared_ptr blob_container_client_, - const std::string & common_path_prefix_, - const BlobsPathToSize & blobs_to_read_, + const PathsWithSize & blobs_to_read_, size_t max_single_read_retries_, size_t max_single_download_retries_, const ReadSettings & settings_) - : ReadBufferFromRemoteFSGather(common_path_prefix_, blobs_to_read_, settings_) + : ReadBufferFromRemoteFSGather(blobs_to_read_, settings_) , blob_container_client(blob_container_client_) , max_single_read_retries(max_single_read_retries_) , max_single_download_retries(max_single_download_retries_) @@ -169,11 +164,10 @@ class ReadBufferFromWebServerGather final : public ReadBufferFromRemoteFSGather public: ReadBufferFromWebServerGather( const String & uri_, - const std::string & common_path_prefix_, - const BlobsPathToSize & blobs_to_read_, + const PathsWithSize & blobs_to_read_, ContextPtr context_, const ReadSettings & settings_) - : ReadBufferFromRemoteFSGather(common_path_prefix_, blobs_to_read_, settings_) + : ReadBufferFromRemoteFSGather(blobs_to_read_, settings_) , uri(uri_) , context(context_) { @@ -195,15 +189,12 @@ public: ReadBufferFromHDFSGather( const Poco::Util::AbstractConfiguration & config_, const String & hdfs_uri_, - const std::string & common_path_prefix_, - const BlobsPathToSize & blobs_to_read_, + const PathsWithSize & blobs_to_read_, const ReadSettings & settings_) - : ReadBufferFromRemoteFSGather(common_path_prefix_, blobs_to_read_, settings_) + : ReadBufferFromRemoteFSGather(blobs_to_read_, settings_) , config(config_) + , hdfs_uri(hdfs_uri_) { - const size_t begin_of_path = hdfs_uri_.find('/', hdfs_uri_.find("//") + 2); - hdfs_directory = hdfs_uri_.substr(begin_of_path); - hdfs_uri = hdfs_uri_.substr(0, begin_of_path); } SeekableReadBufferPtr createImplementationBufferImpl(const String & path, size_t file_size) override; @@ -211,7 +202,6 @@ public: private: const Poco::Util::AbstractConfiguration & config; String hdfs_uri; - String hdfs_directory; }; #endif diff --git a/src/Disks/IO/ReadBufferFromWebServer.h b/src/Disks/IO/ReadBufferFromWebServer.h index ea746fb75a1..1e4219d53ee 100644 --- a/src/Disks/IO/ReadBufferFromWebServer.h +++ b/src/Disks/IO/ReadBufferFromWebServer.h @@ -19,7 +19,8 @@ class ReadBufferFromWebServer : public SeekableReadBuffer { public: explicit ReadBufferFromWebServer( - const String & url_, ContextPtr context_, + const String & url_, + ContextPtr context_, const ReadSettings & settings_ = {}, bool use_external_buffer_ = false, size_t read_until_position = 0); diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 4ea7c609a51..32fd285dcdb 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -67,16 +67,18 @@ std::unique_ptr AzureObjectStorage::readObject( /// NOLINT } std::unique_ptr AzureObjectStorage::readObjects( /// NOLINT - const std::string & common_path_prefix, - const BlobsPathToSize & blobs_to_read, + const PathsWithSize & paths_to_read, const ReadSettings & read_settings, std::optional, std::optional) const { auto settings_ptr = settings.get(); auto reader_impl = std::make_unique( - client.get(), common_path_prefix, blobs_to_read, - settings_ptr->max_single_read_retries, settings_ptr->max_single_download_retries, read_settings); + client.get(), + paths_to_read, + settings_ptr->max_single_read_retries, + settings_ptr->max_single_download_retries, + read_settings); if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { @@ -111,7 +113,7 @@ std::unique_ptr AzureObjectStorage::writeObject( /// NO return std::make_unique(std::move(buffer), std::move(finalize_callback), path); } -void AzureObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & children) const +void AzureObjectStorage::listPrefix(const std::string & path, PathsWithSize & children) const { auto client_ptr = client.get(); @@ -134,10 +136,10 @@ void AzureObjectStorage::removeObject(const std::string & path) throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file in AzureBlob Storage: {}", path); } -void AzureObjectStorage::removeObjects(const std::vector & paths) +void AzureObjectStorage::removeObjects(const PathsWithSize & paths) { auto client_ptr = client.get(); - for (const auto & path : paths) + for (const auto & [path, _] : paths) { auto delete_info = client_ptr->DeleteBlob(path); if (!delete_info.Value.Deleted) @@ -151,10 +153,10 @@ void AzureObjectStorage::removeObjectIfExists(const std::string & path) auto delete_info = client_ptr->DeleteBlob(path); } -void AzureObjectStorage::removeObjectsIfExist(const std::vector & paths) +void AzureObjectStorage::removeObjectsIfExist(const PathsWithSize & paths) { auto client_ptr = client.get(); - for (const auto & path : paths) + for (const auto & [path, _] : paths) auto delete_info = client_ptr->DeleteBlob(path); } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index 37c3ba72ed9..559be0ad257 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -59,8 +59,7 @@ public: std::optional file_size = {}) const override; std::unique_ptr readObjects( /// NOLINT - const std::string & common_path_prefix, - const BlobsPathToSize & blobs_to_read, + const PathsWithSize & blobs_to_read, const ReadSettings & read_settings = ReadSettings{}, std::optional read_hint = {}, std::optional file_size = {}) const override; @@ -74,15 +73,16 @@ public: size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, const WriteSettings & write_settings = {}) override; - void listPrefix(const std::string & path, BlobsPathToSize & children) const override; + void listPrefix(const std::string & path, PathsWithSize & children) const override; + /// Remove file. Throws exception if file doesn't exists or it's a directory. void removeObject(const std::string & path) override; - void removeObjects(const std::vector & paths) override; + void removeObjects(const PathsWithSize & paths) override; void removeObjectIfExists(const std::string & path) override; - void removeObjectsIfExist(const std::vector & paths) override; + void removeObjectsIfExist(const PathsWithSize & paths) override; ObjectMetadata getObjectMetadata(const std::string & path) const override; @@ -95,11 +95,19 @@ public: void startup() override {} - void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + void applyNewSettings( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context) override; String getObjectsNamespace() const override { return ""; } - std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + std::unique_ptr cloneObjectStorage( + const std::string & new_namespace, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context) override; + private: const String name; diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 0f2c320ed67..540672d9b0a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -108,9 +108,9 @@ DiskObjectStorage::DiskObjectStorage( , metadata_helper(std::make_unique(this, ReadSettings{})) {} -std::vector DiskObjectStorage::getRemotePaths(const String & local_path) const +PathsWithSize DiskObjectStorage::getObjectStoragePaths(const String & local_path) const { - return metadata_storage->getRemotePaths(local_path); + return metadata_storage->getObjectStoragePaths(local_path); } void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std::vector & paths_map) @@ -120,7 +120,7 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std:: { try { - paths_map.emplace_back(local_path, getRemotePaths(local_path)); + paths_map.emplace_back(local_path, getObjectStoragePaths(local_path)); } catch (const Exception & e) { @@ -244,9 +244,9 @@ String DiskObjectStorage::getUniqueId(const String & path) const { LOG_TRACE(log, "Remote path: {}, Path: {}", remote_fs_root_path, path); String id; - auto blobs_paths = metadata_storage->getRemotePaths(path); + auto blobs_paths = metadata_storage->getObjectStoragePaths(path); if (!blobs_paths.empty()) - id = blobs_paths[0]; + id = blobs_paths[0].path; return id; } @@ -438,7 +438,11 @@ std::unique_ptr DiskObjectStorage::readFile( std::optional read_hint, std::optional file_size) const { - return object_storage->readObjects(remote_fs_root_path, metadata_storage->getBlobs(path), settings, read_hint, file_size); + return object_storage->readObjects( + metadata_storage->getObjectStoragePaths(path), + settings, + read_hint, + file_size); } std::unique_ptr DiskObjectStorage::writeFile( diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index b1a1d263ede..ef29dc8f071 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -49,7 +49,7 @@ public: const String & getPath() const override { return metadata_storage->getPath(); } - std::vector getRemotePaths(const String & local_path) const override; + PathsWithSize getObjectStoragePaths(const String & local_path) const override; void getRemotePathsRecursive(const String & local_path, std::vector & paths_map) override; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp index 6ed049b865d..6763e37ed69 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -26,14 +26,14 @@ void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf) assertChar('\n', buf); - UInt32 remote_fs_objects_count; - readIntText(remote_fs_objects_count, buf); + UInt32 storage_objects_count; + readIntText(storage_objects_count, buf); assertChar('\t', buf); readIntText(total_size, buf); assertChar('\n', buf); - remote_fs_objects.resize(remote_fs_objects_count); + storage_objects.resize(storage_objects_count); - for (size_t i = 0; i < remote_fs_objects_count; ++i) + for (size_t i = 0; i < storage_objects_count; ++i) { String remote_fs_object_path; size_t remote_fs_object_size; @@ -50,8 +50,8 @@ void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf) remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size()); } assertChar('\n', buf); - remote_fs_objects[i].relative_path = remote_fs_object_path; - remote_fs_objects[i].bytes_size = remote_fs_object_size; + storage_objects[i].path = remote_fs_object_path; + storage_objects[i].size = remote_fs_object_size; } readIntText(ref_count, buf); @@ -75,12 +75,12 @@ void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const writeIntText(VERSION_READ_ONLY_FLAG, buf); writeChar('\n', buf); - writeIntText(remote_fs_objects.size(), buf); + writeIntText(storage_objects.size(), buf); writeChar('\t', buf); writeIntText(total_size, buf); writeChar('\n', buf); - for (const auto & [remote_fs_object_path, remote_fs_object_size] : remote_fs_objects) + for (const auto & [remote_fs_object_path, remote_fs_object_size] : storage_objects) { writeIntText(remote_fs_object_size, buf); writeChar('\t', buf); @@ -120,7 +120,7 @@ DiskObjectStorageMetadata::DiskObjectStorageMetadata( void DiskObjectStorageMetadata::addObject(const String & path, size_t size) { total_size += size; - remote_fs_objects.emplace_back(path, size); + storage_objects.emplace_back(path, size); } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h index 27df9e57cf4..14dd819b4f8 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h @@ -19,8 +19,8 @@ private: const std::string & common_metadata_path; - /// Remote FS objects paths and their sizes. - std::vector remote_fs_objects; + /// Relative paths of blobs. + std::vector storage_objects; /// URI const std::string & remote_fs_root_path; @@ -60,9 +60,9 @@ public: return remote_fs_root_path; } - std::vector getBlobs() const + std::vector getBlobs() const { - return remote_fs_objects; + return storage_objects; } bool isReadOnly() const diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp index 96667b8496a..f820d06646a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp @@ -84,13 +84,13 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateFileToRestorableSchema { LOG_TRACE(disk->log, "Migrate file {} to restorable schema", disk->metadata_storage->getPath() + path); - auto blobs = disk->metadata_storage->getBlobs(path); - for (const auto & [key, _] : blobs) + auto objects = disk->metadata_storage->getObjectStoragePaths(path); + for (const auto & [object_path, size] : objects) { ObjectAttributes metadata { {"path", path} }; - updateObjectMetadata(disk->remote_fs_root_path + key, metadata); + updateObjectMetadata(object_path, metadata); } } void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchemaRecursive(const String & path, Futures & results) @@ -346,7 +346,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage * LOG_INFO(disk->log, "Starting restore files for disk {}", disk->name); std::vector> results; - auto restore_files = [this, &source_object_storage, &restore_information, &results](const BlobsPathToSize & keys) + auto restore_files = [this, &source_object_storage, &restore_information, &results](const PathsWithSize & keys) { std::vector keys_names; for (const auto & [key, size] : keys) @@ -379,7 +379,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage * return true; }; - BlobsPathToSize children; + PathsWithSize children; source_object_storage->listPrefix(restore_information.source_path, children); restore_files(children); @@ -456,7 +456,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject bool send_metadata = source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->remote_fs_root_path != restore_information.source_path; std::set renames; - auto restore_file_operations = [this, &source_object_storage, &restore_information, &renames, &send_metadata](const BlobsPathToSize & keys) + auto restore_file_operations = [this, &source_object_storage, &restore_information, &renames, &send_metadata](const PathsWithSize & keys) { const String rename = "rename"; const String hardlink = "hardlink"; @@ -523,7 +523,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject return true; }; - BlobsPathToSize children; + PathsWithSize children; source_object_storage->listPrefix(restore_information.source_path + "operations/", children); restore_file_operations(children); diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index cd43ea81be0..1d16012437a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -65,7 +65,7 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation std::string path; bool delete_metadata_only; bool remove_from_cache{false}; - std::vector paths_to_remove; + PathsWithSize paths_to_remove; bool if_exists; RemoveObjectStorageOperation( @@ -96,13 +96,13 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation try { uint32_t hardlink_count = metadata_storage.getHardlinkCount(path); - auto remote_objects = metadata_storage.getRemotePaths(path); + auto objects = metadata_storage.getObjectStoragePaths(path); tx->unlinkMetadata(path); if (hardlink_count == 0) { - paths_to_remove = remote_objects; + paths_to_remove = objects; remove_from_cache = true; } } @@ -134,7 +134,7 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation if (remove_from_cache) { for (const auto & path_to_remove : paths_to_remove) - object_storage.removeFromCache(path_to_remove); + object_storage.removeFromCache(path_to_remove.path); } } @@ -143,10 +143,10 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOperation { std::string path; - std::unordered_map> paths_to_remove; + std::unordered_map paths_to_remove; bool keep_all_batch_data; NameSet file_names_remove_metadata_only; - std::vector path_to_remove_from_cache; + PathsWithSize path_to_remove_from_cache; RemoveRecursiveObjectStorageOperation( IObjectStorage & object_storage_, @@ -169,14 +169,14 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp try { uint32_t hardlink_count = metadata_storage.getHardlinkCount(path_to_remove); - auto remote_objects = metadata_storage.getRemotePaths(path_to_remove); + auto objects_paths = metadata_storage.getObjectStoragePaths(path_to_remove); tx->unlinkMetadata(path_to_remove); if (hardlink_count == 0) { - paths_to_remove[path_to_remove] = remote_objects; - path_to_remove_from_cache.insert(path_to_remove_from_cache.end(), remote_objects.begin(), remote_objects.end()); + paths_to_remove[path_to_remove] = objects_paths; + path_to_remove_from_cache.insert(path_to_remove_from_cache.end(), objects_paths.begin(), objects_paths.end()); } } @@ -217,7 +217,7 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp { if (!keep_all_batch_data) { - std::vector remove_from_remote; + PathsWithSize remove_from_remote; for (auto && [local_path, remote_paths] : paths_to_remove) { if (!file_names_remove_metadata_only.contains(fs::path(local_path).filename())) @@ -228,7 +228,7 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp object_storage.removeObjects(remove_from_remote); } - for (const auto & path_to_remove : path_to_remove_from_cache) + for (const auto & [path_to_remove, _] : path_to_remove_from_cache) object_storage.removeFromCache(path_to_remove); } }; @@ -238,7 +238,7 @@ struct ReplaceFileObjectStorageOperation final : public IDiskObjectStorageOperat { std::string path_from; std::string path_to; - std::vector blobs_to_remove; + PathsWithSize blobs_to_remove; ReplaceFileObjectStorageOperation( IObjectStorage & object_storage_, @@ -254,7 +254,7 @@ struct ReplaceFileObjectStorageOperation final : public IDiskObjectStorageOperat { if (metadata_storage.exists(path_to)) { - blobs_to_remove = metadata_storage.getRemotePaths(path_to); + blobs_to_remove = metadata_storage.getObjectStoragePaths(path_to); tx->replaceFile(path_from, path_to); } else @@ -328,14 +328,15 @@ struct CopyFileObjectStorageOperation final : public IDiskObjectStorageOperation void execute(MetadataTransactionPtr tx) override { tx->createEmptyMetadataFile(to_path); - auto source_blobs = metadata_storage.getBlobs(from_path); + auto source_blobs = metadata_storage.getObjectStoragePaths(from_path); /// Full paths + for (const auto & [blob_from, size] : source_blobs) { auto blob_name = getRandomASCIIString(); auto blob_to = fs::path(remote_fs_root_path) / blob_name; - object_storage.copyObject(fs::path(remote_fs_root_path) / blob_from, blob_to); + object_storage.copyObject(blob_from, blob_to); tx->addBlobToMetadata(to_path, blob_name, size); diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 4574b8cb52c..024d1e84ef0 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -49,13 +49,12 @@ std::unique_ptr HDFSObjectStorage::readObject( /// NOLINT } std::unique_ptr HDFSObjectStorage::readObjects( /// NOLINT - const std::string & common_path_prefix, - const BlobsPathToSize & blobs_to_read, + const PathsWithSize & paths_to_read, const ReadSettings & read_settings, std::optional, std::optional) const { - auto hdfs_impl = std::make_unique(config, common_path_prefix, common_path_prefix, blobs_to_read, read_settings); + auto hdfs_impl = std::make_unique(config, hdfs_root_path, paths_to_read, read_settings); auto buf = std::make_unique(std::move(hdfs_impl)); return std::make_unique(std::move(buf), settings->min_bytes_for_seek); } @@ -69,7 +68,9 @@ std::unique_ptr HDFSObjectStorage::writeObject( /// NOL const WriteSettings &) { if (attributes.has_value()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); + throw Exception( + ErrorCodes::UNSUPPORTED_METHOD, + "HDFS API doesn't support custom attributes/metadata for stored objects"); /// Single O_WRONLY in libhdfs adds O_TRUNC auto hdfs_buffer = std::make_unique( @@ -80,7 +81,7 @@ std::unique_ptr HDFSObjectStorage::writeObject( /// NOL } -void HDFSObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & children) const +void HDFSObjectStorage::listPrefix(const std::string & path, PathsWithSize & children) const { const size_t begin_of_path = path.find('/', path.find("//") + 2); int32_t num_entries; @@ -104,10 +105,10 @@ void HDFSObjectStorage::removeObject(const std::string & path) } -void HDFSObjectStorage::removeObjects(const std::vector & paths) +void HDFSObjectStorage::removeObjects(const PathsWithSize & paths) { - for (const auto & hdfs_path : paths) - removeObject(hdfs_path); + for (const auto & [path, _] : paths) + removeObject(path); } void HDFSObjectStorage::removeObjectIfExists(const std::string & path) @@ -116,15 +117,17 @@ void HDFSObjectStorage::removeObjectIfExists(const std::string & path) removeObject(path); } -void HDFSObjectStorage::removeObjectsIfExist(const std::vector & paths) +void HDFSObjectStorage::removeObjectsIfExist(const PathsWithSize & paths) { - for (const auto & hdfs_path : paths) - removeObjectIfExists(hdfs_path); + for (const auto & [path, _] : paths) + removeObjectIfExists(path); } ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string &) const { - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); + throw Exception( + ErrorCodes::UNSUPPORTED_METHOD, + "HDFS API doesn't support custom attributes/metadata for stored objects"); } void HDFSObjectStorage::copyObject( /// NOLINT @@ -133,7 +136,9 @@ void HDFSObjectStorage::copyObject( /// NOLINT std::optional object_to_attributes) { if (object_to_attributes.has_value()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); + throw Exception( + ErrorCodes::UNSUPPORTED_METHOD, + "HDFS API doesn't support custom attributes/metadata for stored objects"); auto in = readObject(object_from); auto out = writeObject(object_to, WriteMode::Rewrite); diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index a9a223a3d7e..221a77ff08b 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -50,6 +50,7 @@ public: , hdfs_builder(createHDFSBuilder(hdfs_root_path_, config)) , hdfs_fs(createHDFSFS(hdfs_builder.get())) , settings(std::move(settings_)) + , hdfs_root_path(hdfs_root_path_) {} bool exists(const std::string & hdfs_uri) const override; @@ -61,8 +62,7 @@ public: std::optional file_size = {}) const override; std::unique_ptr readObjects( /// NOLINT - const std::string & common_path_prefix, - const BlobsPathToSize & blobs_to_read, + const PathsWithSize & paths_to_read, const ReadSettings & read_settings = ReadSettings{}, std::optional read_hint = {}, std::optional file_size = {}) const override; @@ -76,15 +76,16 @@ public: size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, const WriteSettings & write_settings = {}) override; - void listPrefix(const std::string & path, BlobsPathToSize & children) const override; + void listPrefix(const std::string & path, PathsWithSize & children) const override; + /// Remove file. Throws exception if file doesn't exists or it's a directory. void removeObject(const std::string & path) override; - void removeObjects(const std::vector & paths) override; + void removeObjects(const PathsWithSize & paths) override; void removeObjectIfExists(const std::string & path) override; - void removeObjectsIfExist(const std::vector & paths) override; + void removeObjectsIfExist(const PathsWithSize & paths) override; ObjectMetadata getObjectMetadata(const std::string & path) const override; @@ -97,11 +98,18 @@ public: void startup() override; - void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + void applyNewSettings( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context) override; String getObjectsNamespace() const override { return ""; } - std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + std::unique_ptr cloneObjectStorage( + const std::string & new_namespace, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context) override; private: const Poco::Util::AbstractConfiguration & config; @@ -110,8 +118,7 @@ private: HDFSFSPtr hdfs_fs; SettingsPtr settings; - - + String hdfs_root_path; }; } diff --git a/src/Disks/ObjectStorages/IMetadataStorage.h b/src/Disks/ObjectStorages/IMetadataStorage.h index fbf611df1ac..49f4d4bcc51 100644 --- a/src/Disks/ObjectStorages/IMetadataStorage.h +++ b/src/Disks/ObjectStorages/IMetadataStorage.h @@ -119,11 +119,9 @@ public: /// Read multiple metadata files into strings and return mapping from file_path -> metadata virtual std::unordered_map getSerializedMetadata(const std::vector & file_paths) const = 0; - /// Return list of paths corresponding to metadata stored in local path - virtual std::vector getRemotePaths(const std::string & path) const = 0; - - /// Return [(remote_path, size_in_bytes), ...] for metadata path - virtual BlobsPathToSize getBlobs(const std::string & path) const = 0; + /// Return [(object_storage_path, size_in_bytes), ...] for metadata path + /// object_storage_path is a full path to the blob. + virtual PathsWithSize getObjectStoragePaths(const std::string & path) const = 0; }; using MetadataStoragePtr = std::shared_ptr; diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 4921059c6b7..864b9ae8caa 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -25,23 +25,23 @@ class WriteBufferFromFileBase; using ObjectAttributes = std::map; -/// Path to blob with it's size -struct BlobPathWithSize +/// Path to a file and its size. +/// Path can be either relative or absolute - according to the context of use. +struct PathWithSize { - std::string relative_path; - uint64_t bytes_size; + std::string path; + uint64_t size; /// Size in bytes - BlobPathWithSize() = default; - BlobPathWithSize(const BlobPathWithSize & other) = default; + PathWithSize() = default; - BlobPathWithSize(const std::string & relative_path_, uint64_t bytes_size_) - : relative_path(relative_path_) - , bytes_size(bytes_size_) + PathWithSize(const std::string & path_, uint64_t size_) + : path(path_) + , size(size_) {} }; -/// List of blobs with their sizes -using BlobsPathToSize = std::vector; +/// List of paths with their sizes +using PathsWithSize = std::vector; struct ObjectMetadata { @@ -65,8 +65,8 @@ public: /// Path exists or not virtual bool exists(const std::string & path) const = 0; - /// List on prefix, return children with their sizes. - virtual void listPrefix(const std::string & path, BlobsPathToSize & children) const = 0; + /// List on prefix, return children (relative paths) with their sizes. + virtual void listPrefix(const std::string & path, PathsWithSize & children) const = 0; /// Get object metadata if supported. It should be possible to receive /// at least size of object @@ -81,8 +81,7 @@ public: /// Read multiple objects with common prefix virtual std::unique_ptr readObjects( /// NOLINT - const std::string & common_path_prefix, - const BlobsPathToSize & blobs_to_read, + const PathsWithSize & paths_to_read, const ReadSettings & read_settings = ReadSettings{}, std::optional read_hint = {}, std::optional file_size = {}) const = 0; @@ -101,13 +100,13 @@ public: /// Remove multiple objects. Some object storages can do batch remove in a more /// optimal way. - virtual void removeObjects(const std::vector & paths) = 0; + virtual void removeObjects(const PathsWithSize & paths) = 0; /// Remove object on path if exists virtual void removeObjectIfExists(const std::string & path) = 0; /// Remove objects on path if exists - virtual void removeObjectsIfExist(const std::vector & paths) = 0; + virtual void removeObjectsIfExist(const PathsWithSize & paths) = 0; /// Copy object with different attributes if required virtual void copyObject( /// NOLINT @@ -140,7 +139,10 @@ public: void removeFromCache(const std::string & path); /// Apply new settings, in most cases reiniatilize client and some other staff - virtual void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) = 0; + virtual void applyNewSettings( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context) = 0; /// Sometimes object storages have something similar to chroot or namespace, for example /// buckets in S3. If object storage doesn't have any namepaces return empty string. @@ -148,7 +150,10 @@ public: /// FIXME: confusing function required for a very specific case. Create new instance of object storage /// in different namespace. - virtual std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) = 0; + virtual std::unique_ptr cloneObjectStorage( + const std::string & new_namespace, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, ContextPtr context) = 0; protected: FileCachePtr cache; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp index ae87e8c61c0..393603ff11a 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp @@ -300,18 +300,18 @@ MetadataTransactionPtr MetadataStorageFromDisk::createTransaction() const return std::make_shared(*this); } -std::vector MetadataStorageFromDisk::getRemotePaths(const std::string & path) const +PathsWithSize MetadataStorageFromDisk::getObjectStoragePaths(const std::string & path) const { auto metadata = readMetadata(path); - std::vector remote_paths; - auto blobs = metadata->getBlobs(); + PathsWithSize object_storage_paths = metadata->getBlobs(); /// Relative paths. auto root_path = metadata->getBlobsCommonPrefix(); - remote_paths.reserve(blobs.size()); - for (const auto & [remote_path, _] : blobs) - remote_paths.push_back(fs::path(root_path) / remote_path); - return remote_paths; + /// Relative paths -> absolute. + for (auto & [object_path, _] : object_storage_paths) + object_path = fs::path(root_path) / object_path; + + return object_storage_paths; } uint32_t MetadataStorageFromDisk::getHardlinkCount(const std::string & path) const @@ -320,12 +320,6 @@ uint32_t MetadataStorageFromDisk::getHardlinkCount(const std::string & path) con return metadata->getRefCount(); } -BlobsPathToSize MetadataStorageFromDisk::getBlobs(const std::string & path) const -{ - auto metadata = readMetadata(path); - return metadata->getBlobs(); -} - void MetadataStorageFromDiskTransaction::unlinkMetadata(const std::string & path) { auto metadata = metadata_storage.readMetadata(path); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h index 1ac68e193f2..8dd47d18fdb 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h @@ -59,9 +59,7 @@ public: std::unordered_map getSerializedMetadata(const std::vector & file_paths) const override; - BlobsPathToSize getBlobs(const std::string & path) const override; - - std::vector getRemotePaths(const std::string & path) const override; + PathsWithSize getObjectStoragePaths(const std::string & path) const override; uint32_t getHardlinkCount(const std::string & path) const override; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index ffe4d2dd942..62995e04e9b 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -109,8 +109,7 @@ bool S3ObjectStorage::exists(const std::string & path) const std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT - const std::string & common_path_prefix, - const BlobsPathToSize & blobs_to_read, + const PathsWithSize & paths_to_read, const ReadSettings & read_settings, std::optional, std::optional) const @@ -128,8 +127,12 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT auto settings_ptr = s3_settings.get(); auto s3_impl = std::make_unique( - client.get(), bucket, version_id, common_path_prefix, blobs_to_read, - settings_ptr->s3_settings.max_single_read_retries, disk_read_settings); + client.get(), + bucket, + version_id, + paths_to_read, + settings_ptr->s3_settings.max_single_read_retries, + read_settings); if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { @@ -192,7 +195,7 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN return std::make_unique(std::move(s3_buffer), std::move(finalize_callback), path); } -void S3ObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & children) const +void S3ObjectStorage::listPrefix(const std::string & path, PathsWithSize & children) const { auto settings_ptr = s3_settings.get(); auto client_ptr = client.get(); @@ -253,14 +256,14 @@ void S3ObjectStorage::removeObjectImpl(const std::string & path, bool if_exists) } } -void S3ObjectStorage::removeObjectsImpl(const std::vector & paths, bool if_exists) +void S3ObjectStorage::removeObjectsImpl(const PathsWithSize & paths, bool if_exists) { if (paths.empty()) return; if (!s3_capabilities.support_batch_delete) { - for (const auto & path : paths) + for (const auto & [path, _] : paths) removeObjectImpl(path, if_exists); } else @@ -278,12 +281,12 @@ void S3ObjectStorage::removeObjectsImpl(const std::vector & paths, for (; current_position < paths.size() && current_chunk.size() < chunk_size_limit; ++current_position) { Aws::S3::Model::ObjectIdentifier obj; - obj.SetKey(paths[current_position]); + obj.SetKey(paths[current_position].path); current_chunk.push_back(obj); if (!keys.empty()) keys += ", "; - keys += paths[current_position]; + keys += paths[current_position].path; } Aws::S3::Model::Delete delkeys; @@ -308,12 +311,12 @@ void S3ObjectStorage::removeObjectIfExists(const std::string & path) removeObjectImpl(path, true); } -void S3ObjectStorage::removeObjects(const std::vector & paths) +void S3ObjectStorage::removeObjects(const PathsWithSize & paths) { removeObjectsImpl(paths, false); } -void S3ObjectStorage::removeObjectsIfExist(const std::vector & paths) +void S3ObjectStorage::removeObjectsIfExist(const PathsWithSize & paths) { removeObjectsImpl(paths, true); } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 16e6c9c8cd5..5c53ea1f894 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -66,8 +66,7 @@ public: std::optional file_size = {}) const override; std::unique_ptr readObjects( /// NOLINT - const std::string & common_path_prefix, - const BlobsPathToSize & blobs_to_read, + const PathsWithSize & paths_to_read, const ReadSettings & read_settings = ReadSettings{}, std::optional read_hint = {}, std::optional file_size = {}) const override; @@ -81,15 +80,16 @@ public: size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, const WriteSettings & write_settings = {}) override; - void listPrefix(const std::string & path, BlobsPathToSize & children) const override; + void listPrefix(const std::string & path, PathsWithSize & children) const override; + /// Remove file. Throws exception if file doesn't exist or it's a directory. void removeObject(const std::string & path) override; - void removeObjects(const std::vector & paths) override; + void removeObjects(const PathsWithSize & paths) override; void removeObjectIfExists(const std::string & path) override; - void removeObjectsIfExist(const std::vector & paths) override; + void removeObjectsIfExist(const PathsWithSize & paths) override; ObjectMetadata getObjectMetadata(const std::string & path) const override; @@ -108,26 +108,42 @@ public: void startup() override; - void applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + void applyNewSettings( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context) override; String getObjectsNamespace() const override { return bucket; } - std::unique_ptr cloneObjectStorage(const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) override; + std::unique_ptr cloneObjectStorage( + const std::string & new_namespace, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context) override; + private: void setNewSettings(std::unique_ptr && s3_settings_); void setNewClient(std::unique_ptr && client_); - void copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + void copyObjectImpl( + const String & src_bucket, + const String & src_key, + const String & dst_bucket, + const String & dst_key, std::optional head = std::nullopt, std::optional metadata = std::nullopt) const; - void copyObjectMultipartImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key, + void copyObjectMultipartImpl( + const String & src_bucket, + const String & src_key, + const String & dst_bucket, + const String & dst_key, std::optional head = std::nullopt, std::optional metadata = std::nullopt) const; void removeObjectImpl(const std::string & path, bool if_exists); - void removeObjectsImpl(const std::vector & paths, bool if_exists); + void removeObjectsImpl(const PathsWithSize & paths, bool if_exists); Aws::S3::Model::HeadObjectOutcome requestObjectHeadData(const std::string & bucket_from, const std::string & key) const; diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index a009f9d25c9..d39a0d2482d 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -68,11 +68,11 @@ Pipe StorageSystemRemoteDataPaths::read( col_base_path->insert(disk->getPath()); col_cache_base_path->insert(cache_base_path); col_local_path->insert(local_path); - col_remote_path->insert(remote_path); + col_remote_path->insert(remote_path.path); if (cache) { - auto cache_paths = cache->tryGetCachePaths(cache->hash(remote_path)); + auto cache_paths = cache->tryGetCachePaths(cache->hash(remote_path.path)); col_cache_paths->insert(Array(cache_paths.begin(), cache_paths.end())); } else From 8da6136f882866c3dafbde05bed6a2333b276870 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 26 Jun 2022 13:21:05 +0200 Subject: [PATCH 097/101] Fix --- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 3 +++ src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 804fb69a8f5..e8197eba008 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -95,7 +95,7 @@ SeekableReadBufferPtr ReadBufferFromWebServerGather::createImplementationBufferI { current_file_path = path; return std::make_unique( - path, + fs::path(uri) / path, context, settings, /* use_external_buffer */true, diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 8edacfd36c9..1c4cdc678f4 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -195,6 +195,8 @@ public: , config(config_) , hdfs_uri(hdfs_uri_) { + const size_t begin_of_path = hdfs_uri_.find('/', hdfs_uri_.find("//") + 2); + hdfs_uri = hdfs_uri_.substr(0, begin_of_path); } SeekableReadBufferPtr createImplementationBufferImpl(const String & path, size_t file_size) override; @@ -203,6 +205,7 @@ private: const Poco::Util::AbstractConfiguration & config; String hdfs_uri; }; + #endif } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 62995e04e9b..58bd29d2d73 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -132,7 +132,7 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT version_id, paths_to_read, settings_ptr->s3_settings.max_single_read_retries, - read_settings); + disk_read_settings); if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { From a15238d147617d1697e0922e79bdc85b49951f69 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 26 Jun 2022 16:44:55 +0200 Subject: [PATCH 098/101] Review fixes, fix hdfs tests --- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 9 +++++++-- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 10 +++------- .../ObjectStorages/DiskObjectStorageMetadata.cpp | 8 ++++++-- .../ObjectStorages/DiskObjectStorageMetadata.h | 15 +++++++++++++-- ...skObjectStorageRemoteMetadataRestoreHelper.cpp | 2 +- src/Disks/ObjectStorages/IObjectStorage.h | 6 +++--- .../ObjectStorages/MetadataStorageFromDisk.cpp | 11 +++++++---- 7 files changed, 40 insertions(+), 21 deletions(-) diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index e8197eba008..b53ba0f8e29 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -106,7 +106,12 @@ SeekableReadBufferPtr ReadBufferFromWebServerGather::createImplementationBufferI #if USE_HDFS SeekableReadBufferPtr ReadBufferFromHDFSGather::createImplementationBufferImpl(const String & path, size_t /* file_size */) { - return std::make_unique(hdfs_uri, path, config); + size_t begin_of_path = path.find('/', path.find("//") + 2); + auto hdfs_path = path.substr(begin_of_path); + auto hdfs_uri = path.substr(0, begin_of_path); + LOG_TEST(log, "HDFS uri: {}, path: {}", hdfs_path, hdfs_uri); + + return std::make_unique(hdfs_uri, hdfs_path, config); } #endif @@ -310,7 +315,7 @@ size_t ReadBufferFromRemoteFSGather::getFileSize() const { size_t size = 0; for (const auto & object : blobs_to_read) - size += object.size; + size += object.bytes_size; return size; } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 1c4cdc678f4..e282a2cfc6c 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -66,6 +66,8 @@ protected: String query_id; + Poco::Logger * log; + private: SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t file_size); @@ -92,8 +94,6 @@ private: */ size_t bytes_to_ignore = 0; - Poco::Logger * log; - size_t total_bytes_read_from_current_file = 0; bool enable_cache_log = false; @@ -188,22 +188,18 @@ class ReadBufferFromHDFSGather final : public ReadBufferFromRemoteFSGather public: ReadBufferFromHDFSGather( const Poco::Util::AbstractConfiguration & config_, - const String & hdfs_uri_, + const String &, const PathsWithSize & blobs_to_read_, const ReadSettings & settings_) : ReadBufferFromRemoteFSGather(blobs_to_read_, settings_) , config(config_) - , hdfs_uri(hdfs_uri_) { - const size_t begin_of_path = hdfs_uri_.find('/', hdfs_uri_.find("//") + 2); - hdfs_uri = hdfs_uri_.substr(0, begin_of_path); } SeekableReadBufferPtr createImplementationBufferImpl(const String & path, size_t file_size) override; private: const Poco::Util::AbstractConfiguration & config; - String hdfs_uri; }; #endif diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp index 6763e37ed69..9ac04809b97 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -11,6 +11,7 @@ namespace DB namespace ErrorCodes { extern const int UNKNOWN_FORMAT; + extern const int LOGICAL_ERROR; } void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf) @@ -50,8 +51,8 @@ void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf) remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size()); } assertChar('\n', buf); - storage_objects[i].path = remote_fs_object_path; - storage_objects[i].size = remote_fs_object_size; + storage_objects[i].relative_path = remote_fs_object_path; + storage_objects[i].bytes_size = remote_fs_object_size; } readIntText(ref_count, buf); @@ -119,6 +120,9 @@ DiskObjectStorageMetadata::DiskObjectStorageMetadata( void DiskObjectStorageMetadata::addObject(const String & path, size_t size) { + if (path.starts_with(remote_fs_root_path)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected relative path"); + total_size += size; storage_objects.emplace_back(path, size); } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h index 14dd819b4f8..adebbcb952d 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h @@ -12,6 +12,17 @@ namespace DB struct DiskObjectStorageMetadata { private: + struct RelativePathWithSize + { + String relative_path; + size_t bytes_size; + + RelativePathWithSize() = default; + + RelativePathWithSize(const String & relative_path_, size_t bytes_size_) + : relative_path(relative_path_), bytes_size(bytes_size_) {} + }; + /// Metadata file version. static constexpr uint32_t VERSION_ABSOLUTE_PATHS = 1; static constexpr uint32_t VERSION_RELATIVE_PATHS = 2; @@ -20,7 +31,7 @@ private: const std::string & common_metadata_path; /// Relative paths of blobs. - std::vector storage_objects; + std::vector storage_objects; /// URI const std::string & remote_fs_root_path; @@ -60,7 +71,7 @@ public: return remote_fs_root_path; } - std::vector getBlobs() const + std::vector getBlobsRelativePaths() const { return storage_objects; } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp index f820d06646a..a8140e8954e 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp @@ -85,7 +85,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateFileToRestorableSchema LOG_TRACE(disk->log, "Migrate file {} to restorable schema", disk->metadata_storage->getPath() + path); auto objects = disk->metadata_storage->getObjectStoragePaths(path); - for (const auto & [object_path, size] : objects) + for (const auto & [object_path, _] : objects) { ObjectAttributes metadata { {"path", path} diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 864b9ae8caa..64022ec046d 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -30,13 +30,13 @@ using ObjectAttributes = std::map; struct PathWithSize { std::string path; - uint64_t size; /// Size in bytes + uint64_t bytes_size; PathWithSize() = default; - PathWithSize(const std::string & path_, uint64_t size_) + PathWithSize(const std::string & path_, uint64_t bytes_size_) : path(path_) - , size(size_) + , bytes_size(bytes_size_) {} }; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp index 393603ff11a..32e6fe5834d 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp @@ -304,12 +304,15 @@ PathsWithSize MetadataStorageFromDisk::getObjectStoragePaths(const std::string & { auto metadata = readMetadata(path); - PathsWithSize object_storage_paths = metadata->getBlobs(); /// Relative paths. - auto root_path = metadata->getBlobsCommonPrefix(); + auto object_storage_relative_paths = metadata->getBlobsRelativePaths(); /// Relative paths. + fs::path root_path = metadata->getBlobsCommonPrefix(); + + PathsWithSize object_storage_paths; + object_storage_paths.reserve(object_storage_relative_paths.size()); /// Relative paths -> absolute. - for (auto & [object_path, _] : object_storage_paths) - object_path = fs::path(root_path) / object_path; + for (auto & [object_relative_path, size] : object_storage_relative_paths) + object_storage_paths.emplace_back(root_path / object_relative_path, size); return object_storage_paths; } From 5727671cacd72fe00b93d965e18054c479289cd5 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 27 Jun 2022 00:34:57 +0200 Subject: [PATCH 099/101] Fix tests --- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 12 ++++++++---- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 1 - .../ObjectStorages/DiskObjectStorageMetadata.cpp | 2 +- src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp | 2 +- src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h | 2 -- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index b53ba0f8e29..1a4ae3f963f 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -84,16 +84,20 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBufferImpl(con #if USE_AZURE_BLOB_STORAGE SeekableReadBufferPtr ReadBufferFromAzureBlobStorageGather::createImplementationBufferImpl(const String & path, size_t /* file_size */) { - current_file_path = path; - return std::make_unique(blob_container_client, path, max_single_read_retries, - max_single_download_retries, settings.remote_fs_buffer_size, /* use_external_buffer */true, read_until_position); + return std::make_unique( + blob_container_client, + path, + max_single_read_retries, + max_single_download_retries, + settings.remote_fs_buffer_size, + /* use_external_buffer */true, + read_until_position); } #endif SeekableReadBufferPtr ReadBufferFromWebServerGather::createImplementationBufferImpl(const String & path, size_t /* file_size */) { - current_file_path = path; return std::make_unique( fs::path(uri) / path, context, diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index e282a2cfc6c..6a39f4add46 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -188,7 +188,6 @@ class ReadBufferFromHDFSGather final : public ReadBufferFromRemoteFSGather public: ReadBufferFromHDFSGather( const Poco::Util::AbstractConfiguration & config_, - const String &, const PathsWithSize & blobs_to_read_, const ReadSettings & settings_) : ReadBufferFromRemoteFSGather(blobs_to_read_, settings_) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp index 9ac04809b97..4564e84316d 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -120,7 +120,7 @@ DiskObjectStorageMetadata::DiskObjectStorageMetadata( void DiskObjectStorageMetadata::addObject(const String & path, size_t size) { - if (path.starts_with(remote_fs_root_path)) + if (!remote_fs_root_path.empty() && path.starts_with(remote_fs_root_path)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected relative path"); total_size += size; diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 024d1e84ef0..82c700e1a63 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -54,7 +54,7 @@ std::unique_ptr HDFSObjectStorage::readObjects( /// NOLI std::optional, std::optional) const { - auto hdfs_impl = std::make_unique(config, hdfs_root_path, paths_to_read, read_settings); + auto hdfs_impl = std::make_unique(config, paths_to_read, read_settings); auto buf = std::make_unique(std::move(hdfs_impl)); return std::make_unique(std::move(buf), settings->min_bytes_for_seek); } diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index 221a77ff08b..28f553906ea 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -50,7 +50,6 @@ public: , hdfs_builder(createHDFSBuilder(hdfs_root_path_, config)) , hdfs_fs(createHDFSFS(hdfs_builder.get())) , settings(std::move(settings_)) - , hdfs_root_path(hdfs_root_path_) {} bool exists(const std::string & hdfs_uri) const override; @@ -118,7 +117,6 @@ private: HDFSFSPtr hdfs_fs; SettingsPtr settings; - String hdfs_root_path; }; } From cbcf51530643f332a944902e91a71a6a19537de1 Mon Sep 17 00:00:00 2001 From: San Date: Mon, 27 Jun 2022 12:06:16 +1000 Subject: [PATCH 100/101] Update SECURITY.md.sh Update Security.md generation code to include bugcrowd program information. --- utils/security-generator/SECURITY.md.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/security-generator/SECURITY.md.sh b/utils/security-generator/SECURITY.md.sh index 97c696c1227..381f5b4eaa6 100755 --- a/utils/security-generator/SECURITY.md.sh +++ b/utils/security-generator/SECURITY.md.sh @@ -56,7 +56,7 @@ echo " We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers. -To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). +To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). We do not offer any financial rewards for reporting issues to us using this method. Alternatively, you can also submit your findings through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement. ### When Should I Report a Vulnerability? @@ -76,5 +76,5 @@ As the security issue moves from triage, to identified fix, to release planning ## Public Disclosure Timing -A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect report date to disclosure date to be on the order of 7 days. +A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect the report date to disclosure date to be on the order of 7 days. " From 2487ba7f0006f1f020f90424aff727de57397d65 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 27 Jun 2022 13:16:52 +0200 Subject: [PATCH 101/101] Move `updateInputStream` to `ITransformingStep` (#37393) --- src/Interpreters/ActionsDAG.cpp | 17 +-- src/Interpreters/AggregateDescription.cpp | 27 ----- src/Interpreters/AggregateDescription.h | 4 +- src/Interpreters/Aggregator.cpp | 80 ++++++------ src/Interpreters/Aggregator.h | 49 +++----- src/Interpreters/InterpreterSelectQuery.cpp | 114 +++++++----------- src/Processors/QueryPlan/AggregatingStep.cpp | 30 +++-- src/Processors/QueryPlan/AggregatingStep.h | 13 +- src/Processors/QueryPlan/ArrayJoinStep.cpp | 27 +---- src/Processors/QueryPlan/ArrayJoinStep.h | 7 +- src/Processors/QueryPlan/CreatingSetsStep.cpp | 5 + src/Processors/QueryPlan/CreatingSetsStep.h | 2 + src/Processors/QueryPlan/CubeStep.cpp | 25 ++-- src/Processors/QueryPlan/CubeStep.h | 7 +- src/Processors/QueryPlan/DistinctStep.cpp | 17 +++ src/Processors/QueryPlan/DistinctStep.h | 2 + src/Processors/QueryPlan/ExpressionStep.cpp | 19 +-- src/Processors/QueryPlan/ExpressionStep.h | 4 +- src/Processors/QueryPlan/ExtremesStep.h | 6 + src/Processors/QueryPlan/FillingStep.cpp | 8 ++ src/Processors/QueryPlan/FillingStep.h | 2 + src/Processors/QueryPlan/FilterStep.cpp | 28 ++--- src/Processors/QueryPlan/FilterStep.h | 4 +- src/Processors/QueryPlan/ITransformingStep.h | 15 +++ src/Processors/QueryPlan/JoinStep.cpp | 15 +++ src/Processors/QueryPlan/JoinStep.h | 4 + src/Processors/QueryPlan/LimitByStep.h | 5 + src/Processors/QueryPlan/LimitStep.cpp | 7 -- src/Processors/QueryPlan/LimitStep.h | 8 +- .../QueryPlan/MergingAggregatedStep.cpp | 33 +++-- .../QueryPlan/MergingAggregatedStep.h | 9 +- src/Processors/QueryPlan/OffsetStep.h | 5 + .../Optimizations/filterPushDown.cpp | 67 ++++++---- .../Optimizations/liftUpArrayJoin.cpp | 26 +--- .../Optimizations/liftUpFunctions.cpp | 2 - src/Processors/QueryPlan/RollupStep.cpp | 25 +++- src/Processors/QueryPlan/RollupStep.h | 8 +- src/Processors/QueryPlan/SortingStep.cpp | 11 +- src/Processors/QueryPlan/SortingStep.h | 4 +- src/Processors/QueryPlan/TotalsHavingStep.cpp | 39 ++++-- src/Processors/QueryPlan/TotalsHavingStep.h | 23 ++-- src/Processors/QueryPlan/WindowStep.cpp | 33 ++--- src/Processors/QueryPlan/WindowStep.h | 3 +- .../TTL/TTLAggregationAlgorithm.cpp | 31 ++--- .../AggregatingInOrderTransform.cpp | 13 +- .../Transforms/AggregatingTransform.cpp | 4 +- .../Transforms/AggregatingTransform.h | 13 +- src/Processors/Transforms/CubeTransform.cpp | 5 +- src/Processors/Transforms/CubeTransform.h | 2 +- src/Processors/Transforms/RollupTransform.cpp | 4 +- src/Processors/Transforms/RollupTransform.h | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 49 +++----- .../01763_filter_push_down_bugs.reference | 1 + .../01763_filter_push_down_bugs.sql | 4 + .../0_stateless/01823_explain_json.reference | 6 +- .../queries/0_stateless/01823_explain_json.sh | 2 +- 56 files changed, 503 insertions(+), 472 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index eb073ee8752..2703773f464 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1786,7 +1786,9 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunctio actions->inputs.push_back(input); } - actions->index.push_back(input); + /// We should not add result_predicate into the index for the second time. + if (input->result_name != result_predicate->result_name) + actions->index.push_back(input); } return actions; @@ -1840,13 +1842,14 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( if (can_remove_filter) { /// If filter column is not needed, remove it from index. - for (auto i = index.begin(); i != index.end(); ++i) + std::erase_if(index, [&](const Node * node) { return node == predicate; }); + + /// At the very end of this method we'll call removeUnusedActions() with allow_remove_inputs=false, + /// so we need to manually remove predicate if it is an input node. + if (predicate->type == ActionType::INPUT) { - if (*i == predicate) - { - index.erase(i); - break; - } + std::erase_if(inputs, [&](const Node * node) { return node == predicate; }); + nodes.remove_if([&](const Node & node) { return &node == predicate; }); } } else diff --git a/src/Interpreters/AggregateDescription.cpp b/src/Interpreters/AggregateDescription.cpp index 1a0748b5f97..b0f51ea7c90 100644 --- a/src/Interpreters/AggregateDescription.cpp +++ b/src/Interpreters/AggregateDescription.cpp @@ -82,24 +82,6 @@ void AggregateDescription::explain(WriteBuffer & out, size_t indent) const } out << "\n"; } - - out << prefix << " Argument positions: "; - - if (arguments.empty()) - out << "none\n"; - else - { - bool first = true; - for (auto arg : arguments) - { - if (!first) - out << ", "; - first = false; - - out << arg; - } - out << '\n'; - } } void AggregateDescription::explain(JSONBuilder::JSONMap & map) const @@ -137,15 +119,6 @@ void AggregateDescription::explain(JSONBuilder::JSONMap & map) const args_array->add(name); map.add("Arguments", std::move(args_array)); - - if (!arguments.empty()) - { - auto args_pos_array = std::make_unique(); - for (auto pos : arguments) - args_pos_array->add(pos); - - map.add("Argument Positions", std::move(args_pos_array)); - } } } diff --git a/src/Interpreters/AggregateDescription.h b/src/Interpreters/AggregateDescription.h index 12c14f7a57c..8c3302a8b0b 100644 --- a/src/Interpreters/AggregateDescription.h +++ b/src/Interpreters/AggregateDescription.h @@ -14,8 +14,7 @@ struct AggregateDescription { AggregateFunctionPtr function; Array parameters; /// Parameters of the (parametric) aggregate function. - ColumnNumbers arguments; - Names argument_names; /// used if no `arguments` are specified. + Names argument_names; String column_name; /// What name to use for a column with aggregate function values void explain(WriteBuffer & out, size_t indent) const; /// Get description for EXPLAIN query. @@ -23,5 +22,4 @@ struct AggregateDescription }; using AggregateDescriptions = std::vector; - } diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 8a93dc5fd77..511e5c9e031 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -260,6 +260,14 @@ auto constructWithReserveIfPossible(size_t size_hint) else return std::make_unique(); } + +DB::ColumnNumbers calculateKeysPositions(const DB::Block & header, const DB::Aggregator::Params & params) +{ + DB::ColumnNumbers keys_positions(params.keys_size); + for (size_t i = 0; i < params.keys_size; ++i) + keys_positions[i] = header.getPositionByName(params.keys[i]); + return keys_positions; +} } namespace DB @@ -356,21 +364,17 @@ Aggregator::Params::StatsCollectingParams::StatsCollectingParams( Block Aggregator::getHeader(bool final) const { - return params.getHeader(final); + return params.getHeader(header, final); } Block Aggregator::Params::getHeader( - const Block & src_header, - const Block & intermediate_header, - const ColumnNumbers & keys, - const AggregateDescriptions & aggregates, - bool final) + const Block & header, bool only_merge, const Names & keys, const AggregateDescriptions & aggregates, bool final) { Block res; - if (intermediate_header) + if (only_merge) { - res = intermediate_header.cloneEmpty(); + res = header.cloneEmpty(); if (final) { @@ -386,14 +390,14 @@ Block Aggregator::Params::getHeader( else { for (const auto & key : keys) - res.insert(src_header.safeGetByPosition(key).cloneEmpty()); + res.insert(header.getByName(key).cloneEmpty()); for (const auto & aggregate : aggregates) { - size_t arguments_size = aggregate.arguments.size(); + size_t arguments_size = aggregate.argument_names.size(); DataTypes argument_types(arguments_size); for (size_t j = 0; j < arguments_size; ++j) - argument_types[j] = src_header.safeGetByPosition(aggregate.arguments[j]).type; + argument_types[j] = header.getByName(aggregate.argument_names[j]).type; DataTypePtr type; if (final) @@ -434,9 +438,6 @@ Aggregator::AggregateColumnsConstData Aggregator::Params::makeAggregateColumnsDa void Aggregator::Params::explain(WriteBuffer & out, size_t indent) const { Strings res; - const auto & header = src_header ? src_header - : intermediate_header; - String prefix(indent, ' '); { @@ -444,16 +445,13 @@ void Aggregator::Params::explain(WriteBuffer & out, size_t indent) const out << prefix << "Keys: "; bool first = true; - for (auto key : keys) + for (const auto & key : keys) { if (!first) out << ", "; first = false; - if (key >= header.columns()) - out << "unknown position " << key; - else - out << header.getByPosition(key).name; + out << key; } out << '\n'; @@ -470,18 +468,10 @@ void Aggregator::Params::explain(WriteBuffer & out, size_t indent) const void Aggregator::Params::explain(JSONBuilder::JSONMap & map) const { - const auto & header = src_header ? src_header - : intermediate_header; - auto keys_array = std::make_unique(); - for (auto key : keys) - { - if (key >= header.columns()) - keys_array->add(""); - else - keys_array->add(header.getByPosition(key).name); - } + for (const auto & key : keys) + keys_array->add(key); map.add("Keys", std::move(keys_array)); @@ -526,7 +516,8 @@ public: #endif -Aggregator::Aggregator(const Params & params_) : params(params_) +Aggregator::Aggregator(const Block & header_, const Params & params_) + : header(header_), keys_positions(calculateKeysPositions(header, params_)), params(params_) { /// Use query-level memory tracker if (auto * memory_tracker_child = CurrentThread::getMemoryTracker()) @@ -672,9 +663,9 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod() bool has_nullable_key = false; bool has_low_cardinality = false; - for (const auto & pos : params.keys) + for (const auto & key : params.keys) { - DataTypePtr type = (params.src_header ? params.src_header : params.intermediate_header).safeGetByPosition(pos).type; + DataTypePtr type = header.getByName(key).type; if (type->lowCardinality()) { @@ -1277,11 +1268,15 @@ void NO_INLINE Aggregator::mergeOnIntervalWithoutKeyImpl( } -void Aggregator::prepareAggregateInstructions(Columns columns, AggregateColumns & aggregate_columns, Columns & materialized_columns, - AggregateFunctionInstructions & aggregate_functions_instructions, NestedColumnsHolder & nested_columns_holder) const +void Aggregator::prepareAggregateInstructions( + Columns columns, + AggregateColumns & aggregate_columns, + Columns & materialized_columns, + AggregateFunctionInstructions & aggregate_functions_instructions, + NestedColumnsHolder & nested_columns_holder) const { for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_columns[i].resize(params.aggregates[i].arguments.size()); + aggregate_columns[i].resize(params.aggregates[i].argument_names.size()); aggregate_functions_instructions.resize(params.aggregates_size + 1); aggregate_functions_instructions[params.aggregates_size].that = nullptr; @@ -1293,7 +1288,8 @@ void Aggregator::prepareAggregateInstructions(Columns columns, AggregateColumns for (size_t j = 0; j < aggregate_columns[i].size(); ++j) { - materialized_columns.push_back(columns.at(params.aggregates[i].arguments[j])->convertToFullColumnIfConst()); + const auto pos = header.getPositionByName(params.aggregates[i].argument_names[j]); + materialized_columns.push_back(columns.at(pos)->convertToFullColumnIfConst()); aggregate_columns[i][j] = materialized_columns.back().get(); auto full_column = allow_sparse_arguments @@ -1382,7 +1378,7 @@ bool Aggregator::executeOnBlock(Columns columns, /// Remember the columns we will work with for (size_t i = 0; i < params.keys_size; ++i) { - materialized_columns.push_back(recursiveRemoveSparse(columns.at(params.keys[i]))->convertToFullColumnIfConst()); + materialized_columns.push_back(recursiveRemoveSparse(columns.at(keys_positions[i]))->convertToFullColumnIfConst()); key_columns[i] = materialized_columns.back().get(); if (!result.isLowCardinality()) @@ -1954,11 +1950,11 @@ Block Aggregator::prepareBlockAndFill( MutableColumns final_aggregate_columns(params.aggregates_size); AggregateColumnsData aggregate_columns_data(params.aggregates_size); - Block header = getHeader(final); + Block res_header = getHeader(final); for (size_t i = 0; i < params.keys_size; ++i) { - key_columns[i] = header.safeGetByPosition(i).type->createColumn(); + key_columns[i] = res_header.safeGetByPosition(i).type->createColumn(); key_columns[i]->reserve(rows); } @@ -1967,7 +1963,7 @@ Block Aggregator::prepareBlockAndFill( if (!final) { const auto & aggregate_column_name = params.aggregates[i].column_name; - aggregate_columns[i] = header.getByName(aggregate_column_name).type->createColumn(); + aggregate_columns[i] = res_header.getByName(aggregate_column_name).type->createColumn(); /// The ColumnAggregateFunction column captures the shared ownership of the arena with the aggregate function states. ColumnAggregateFunction & column_aggregate_func = assert_cast(*aggregate_columns[i]); @@ -2003,7 +1999,7 @@ Block Aggregator::prepareBlockAndFill( filler(key_columns, aggregate_columns_data, final_aggregate_columns, final); - Block res = header.cloneEmpty(); + Block res = res_header.cloneEmpty(); for (size_t i = 0; i < params.keys_size; ++i) res.getByPosition(i).column = std::move(key_columns[i]); @@ -2018,7 +2014,7 @@ Block Aggregator::prepareBlockAndFill( } /// Change the size of the columns-constants in the block. - size_t columns = header.columns(); + size_t columns = res_header.columns(); for (size_t i = 0; i < columns; ++i) if (isColumnConst(*res.getByPosition(i).column)) res.getByPosition(i).column = res.getByPosition(i).column->cut(0, rows); diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 475fcd9e249..feb07727725 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -897,13 +897,8 @@ public: struct Params { - /// Data structure of source blocks. - Block src_header; - /// Data structure of intermediate blocks before merge. - Block intermediate_header; - /// What to count. - const ColumnNumbers keys; + const Names keys; const AggregateDescriptions aggregates; const size_t keys_size; const size_t aggregates_size; @@ -937,6 +932,8 @@ public: bool compile_aggregate_expressions; size_t min_count_to_compile_aggregate_expression; + bool only_merge; + struct StatsCollectingParams { StatsCollectingParams(); @@ -957,8 +954,7 @@ public: StatsCollectingParams stats_collecting_params; Params( - const Block & src_header_, - const ColumnNumbers & keys_, + const Names & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_rows_to_group_by_, @@ -972,11 +968,9 @@ public: size_t min_free_disk_space_, bool compile_aggregate_expressions_, size_t min_count_to_compile_aggregate_expression_, - const Block & intermediate_header_ = {}, + bool only_merge_ = false, // true for projections const StatsCollectingParams & stats_collecting_params_ = {}) - : src_header(src_header_) - , intermediate_header(intermediate_header_) - , keys(keys_) + : keys(keys_) , aggregates(aggregates_) , keys_size(keys.size()) , aggregates_size(aggregates.size()) @@ -992,33 +986,22 @@ public: , min_free_disk_space(min_free_disk_space_) , compile_aggregate_expressions(compile_aggregate_expressions_) , min_count_to_compile_aggregate_expression(min_count_to_compile_aggregate_expression_) + , only_merge(only_merge_) , stats_collecting_params(stats_collecting_params_) { } /// Only parameters that matter during merge. - Params( - const Block & intermediate_header_, - const ColumnNumbers & keys_, - const AggregateDescriptions & aggregates_, - bool overflow_row_, - size_t max_threads_) - : Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, 0, 0, 0, false, nullptr, max_threads_, 0, false, 0, {}, {}) + Params(const Names & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_threads_) + : Params( + keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, 0, 0, 0, false, nullptr, max_threads_, 0, false, 0, true, {}) { - intermediate_header = intermediate_header_; } - static Block getHeader( - const Block & src_header, - const Block & intermediate_header, - const ColumnNumbers & keys, - const AggregateDescriptions & aggregates, - bool final); + static Block + getHeader(const Block & header, bool only_merge, const Names & keys, const AggregateDescriptions & aggregates, bool final); - Block getHeader(bool final) const - { - return getHeader(src_header, intermediate_header, keys, aggregates, final); - } + Block getHeader(const Block & header_, bool final) const { return getHeader(header_, only_merge, keys, aggregates, final); } /// Remember the columns we will work with ColumnRawPtrs makeRawKeyColumns(const Block & block) const; @@ -1029,7 +1012,7 @@ public: void explain(JSONBuilder::JSONMap & map) const; }; - explicit Aggregator(const Params & params_); + explicit Aggregator(const Block & header_, const Params & params_); /// Process one block. Return false if the processing should be aborted (with group_by_overflow_mode = 'break'). bool executeOnBlock(const Block & block, @@ -1106,6 +1089,10 @@ private: friend class ConvertingAggregatedToChunksSource; friend class AggregatingInOrderTransform; + /// Data structure of source blocks. + Block header; + /// Positions of aggregation key columns in the header. + const ColumnNumbers keys_positions; Params params; AggregatedDataVariants::Type method_chosen; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 77ed83446b1..047e50272fe 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1592,13 +1592,9 @@ static void executeMergeAggregatedImpl( const NamesAndTypesList & aggregation_keys, const AggregateDescriptions & aggregates) { - const auto & header_before_merge = query_plan.getCurrentDataStream().header; - - ColumnNumbers keys; + auto keys = aggregation_keys.getNames(); if (has_grouping_sets) - keys.push_back(header_before_merge.getPositionByName("__grouping_set")); - for (const auto & key : aggregation_keys) - keys.push_back(header_before_merge.getPositionByName(key.name)); + keys.insert(keys.begin(), "__grouping_set"); /** There are two modes of distributed aggregation. * @@ -1615,16 +1611,12 @@ static void executeMergeAggregatedImpl( * but it can work more slowly. */ - Aggregator::Params params(header_before_merge, keys, aggregates, overflow_row, settings.max_threads); - - auto transform_params = std::make_shared( - params, - final, - /* only_merge_= */ false); + Aggregator::Params params(keys, aggregates, overflow_row, settings.max_threads); auto merging_aggregated = std::make_unique( query_plan.getCurrentDataStream(), - std::move(transform_params), + params, + final, settings.distributed_aggregation_memory_efficient && is_remote_storage, settings.max_threads, settings.aggregation_memory_efficient_merge_threads); @@ -2174,11 +2166,12 @@ static Aggregator::Params getAggregatorParams( const ASTPtr & query_ptr, const SelectQueryExpressionAnalyzer & query_analyzer, const Context & context, - const Block & current_data_stream_header, - const ColumnNumbers & keys, + const Names & keys, const AggregateDescriptions & aggregates, - bool overflow_row, const Settings & settings, - size_t group_by_two_level_threshold, size_t group_by_two_level_threshold_bytes) + bool overflow_row, + const Settings & settings, + size_t group_by_two_level_threshold, + size_t group_by_two_level_threshold_bytes) { const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams( query_ptr, @@ -2186,8 +2179,8 @@ static Aggregator::Params getAggregatorParams( settings.max_entries_for_hash_table_stats, settings.max_size_to_preallocate_for_aggregation); - return Aggregator::Params{ - current_data_stream_header, + return Aggregator::Params + { keys, aggregates, overflow_row, @@ -2204,42 +2197,30 @@ static Aggregator::Params getAggregatorParams( settings.min_free_disk_space_for_temporary_data, settings.compile_aggregate_expressions, settings.min_count_to_compile_aggregate_expression, - Block{}, + /* only_merge */ false, stats_collecting_params }; } -static GroupingSetsParamsList getAggregatorGroupingSetsParams( - const SelectQueryExpressionAnalyzer & query_analyzer, - const Block & header_before_aggregation, - const ColumnNumbers & all_keys -) +static GroupingSetsParamsList getAggregatorGroupingSetsParams(const SelectQueryExpressionAnalyzer & query_analyzer, const Names & all_keys) { GroupingSetsParamsList result; if (query_analyzer.useGroupingSetKey()) { auto const & aggregation_keys_list = query_analyzer.aggregationKeysList(); - ColumnNumbersList grouping_sets_with_keys; - ColumnNumbersList missing_columns_per_set; - for (const auto & aggregation_keys : aggregation_keys_list) { - ColumnNumbers keys; - std::unordered_set keys_set; + NameSet keys; for (const auto & key : aggregation_keys) - { - keys.push_back(header_before_aggregation.getPositionByName(key.name)); - keys_set.insert(keys.back()); - } + keys.insert(key.name); - ColumnNumbers missing_indexes; - for (size_t i = 0; i < all_keys.size(); ++i) - { - if (!keys_set.contains(all_keys[i])) - missing_indexes.push_back(i); - } - result.emplace_back(std::move(keys), std::move(missing_indexes)); + Names missing_keys; + for (const auto & key : all_keys) + if (!keys.contains(key)) + missing_keys.push_back(key); + + result.emplace_back(aggregation_keys.getNames(), std::move(missing_keys)); } } return result; @@ -2254,24 +2235,24 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac if (options.is_projection_query) return; - const auto & header_before_aggregation = query_plan.getCurrentDataStream().header; - AggregateDescriptions aggregates = query_analyzer->aggregates(); - for (auto & descr : aggregates) - if (descr.arguments.empty()) - for (const auto & name : descr.argument_names) - descr.arguments.push_back(header_before_aggregation.getPositionByName(name)); const Settings & settings = context->getSettingsRef(); - ColumnNumbers keys; - for (const auto & key : query_analyzer->aggregationKeys()) - keys.push_back(header_before_aggregation.getPositionByName(key.name)); + const auto & keys = query_analyzer->aggregationKeys().getNames(); - auto aggregator_params = getAggregatorParams(query_ptr, *query_analyzer, *context, header_before_aggregation, keys, aggregates, overflow_row, settings, - settings.group_by_two_level_threshold, settings.group_by_two_level_threshold_bytes); + auto aggregator_params = getAggregatorParams( + query_ptr, + *query_analyzer, + *context, + keys, + aggregates, + overflow_row, + settings, + settings.group_by_two_level_threshold, + settings.group_by_two_level_threshold_bytes); - auto grouping_sets_params = getAggregatorGroupingSetsParams(*query_analyzer, header_before_aggregation, keys); + auto grouping_sets_params = getAggregatorGroupingSetsParams(*query_analyzer, keys); SortDescription group_by_sort_description; @@ -2292,7 +2273,6 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac std::move(aggregator_params), std::move(grouping_sets_params), final, - /* only_merge_= */ false, settings.max_block_size, settings.aggregation_in_order_max_block_bytes, merge_threads, @@ -2339,11 +2319,9 @@ void InterpreterSelectQuery::executeTotalsAndHaving( { const Settings & settings = context->getSettingsRef(); - const auto & header_before = query_plan.getCurrentDataStream().header; - auto totals_having_step = std::make_unique( query_plan.getCurrentDataStream(), - getAggregatesMask(header_before, query_analyzer->aggregates()), + query_analyzer->aggregates(), overflow_row, expression, has_having ? getSelectQuery().having()->getColumnName() : "", @@ -2357,25 +2335,23 @@ void InterpreterSelectQuery::executeTotalsAndHaving( void InterpreterSelectQuery::executeRollupOrCube(QueryPlan & query_plan, Modificator modificator) { - const auto & header_before_transform = query_plan.getCurrentDataStream().header; - const Settings & settings = context->getSettingsRef(); - ColumnNumbers keys; - for (const auto & key : query_analyzer->aggregationKeys()) - keys.push_back(header_before_transform.getPositionByName(key.name)); + const auto & keys = query_analyzer->aggregationKeys().getNames(); - auto params = getAggregatorParams(query_ptr, *query_analyzer, *context, header_before_transform, keys, query_analyzer->aggregates(), false, settings, 0, 0); - auto transform_params = std::make_shared( - std::move(params), - /* final_= */ true, - /* only_merge_= */ false); + // Arguments will not be present in Rollup / Cube input header and they don't actually needed 'cause these steps will work with AggregateFunctionState-s anyway. + auto aggregates = query_analyzer->aggregates(); + for (auto & aggregate : aggregates) + aggregate.argument_names.clear(); + + auto params = getAggregatorParams(query_ptr, *query_analyzer, *context, keys, aggregates, false, settings, 0, 0); + const bool final = true; QueryPlanStepPtr step; if (modificator == Modificator::ROLLUP) - step = std::make_unique(query_plan.getCurrentDataStream(), std::move(transform_params)); + step = std::make_unique(query_plan.getCurrentDataStream(), std::move(params), final); else if (modificator == Modificator::CUBE) - step = std::make_unique(query_plan.getCurrentDataStream(), std::move(transform_params)); + step = std::make_unique(query_plan.getCurrentDataStream(), std::move(params), final); query_plan.addStep(std::move(step)); } diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 28f821d6f3f..1e62673bc26 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -69,7 +69,6 @@ AggregatingStep::AggregatingStep( Aggregator::Params params_, GroupingSetsParamsList grouping_sets_params_, bool final_, - bool only_merge_, size_t max_block_size_, size_t aggregation_in_order_max_block_bytes_, size_t merge_threads_, @@ -77,11 +76,11 @@ AggregatingStep::AggregatingStep( bool storage_has_evenly_distributed_read_, InputOrderInfoPtr group_by_info_, SortDescription group_by_sort_description_) - : ITransformingStep(input_stream_, appendGroupingColumn(params_.getHeader(final_), grouping_sets_params_), getTraits(), false) + : ITransformingStep( + input_stream_, appendGroupingColumn(params_.getHeader(input_stream_.header, final_), grouping_sets_params_), getTraits(), false) , params(std::move(params_)) , grouping_sets_params(std::move(grouping_sets_params_)) , final(final_) - , only_merge(only_merge_) , max_block_size(max_block_size_) , aggregation_in_order_max_block_bytes(aggregation_in_order_max_block_bytes_) , merge_threads(merge_threads_) @@ -121,7 +120,8 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B * 1. Parallel aggregation is done, and the results should be merged in parallel. * 2. An aggregation is done with store of temporary data on the disk, and they need to be merged in a memory efficient way. */ - auto transform_params = std::make_shared(std::move(params), final, only_merge); + const auto src_header = pipeline.getHeader(); + auto transform_params = std::make_shared(src_header, std::move(params), final); if (!grouping_sets_params.empty()) { @@ -153,7 +153,6 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B { Aggregator::Params params_for_set { - transform_params->params.src_header, grouping_sets_params[i].used_keys, transform_params->params.aggregates, transform_params->params.overflow_row, @@ -168,10 +167,9 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B transform_params->params.min_free_disk_space, transform_params->params.compile_aggregate_expressions, transform_params->params.min_count_to_compile_aggregate_expression, - transform_params->params.intermediate_header, - transform_params->params.stats_collecting_params - }; - auto transform_params_for_set = std::make_shared(std::move(params_for_set), final, only_merge); + /* only_merge */ false, + transform_params->params.stats_collecting_params}; + auto transform_params_for_set = std::make_shared(src_header, std::move(params_for_set), final); if (streams > 1) { @@ -234,15 +232,15 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B grouping_node = &dag->materializeNode(*grouping_node); index.push_back(grouping_node); - size_t missign_column_index = 0; const auto & missing_columns = grouping_sets_params[set_counter].missing_keys; for (size_t i = 0; i < output_header.columns(); ++i) { auto & col = output_header.getByPosition(i); - if (missign_column_index < missing_columns.size() && missing_columns[missign_column_index] == i) + const auto it = std::find_if( + missing_columns.begin(), missing_columns.end(), [&](const auto & missing_col) { return missing_col == col.name; }); + if (it != missing_columns.end()) { - ++missign_column_index; auto column_with_default = col.column->cloneEmpty(); col.type->insertDefaultInto(*column_with_default); auto column = ColumnConst::create(std::move(column_with_default), 0); @@ -391,4 +389,12 @@ void AggregatingStep::describePipeline(FormatSettings & settings) const } } +void AggregatingStep::updateOutputStream() +{ + output_stream = createOutputStream( + input_streams.front(), + appendGroupingColumn(params.getHeader(input_streams.front().header, final), grouping_sets_params), + getDataStreamTraits()); +} + } diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index 1be74da583a..2879cd1e0e9 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -11,13 +11,10 @@ struct GroupingSetsParams { GroupingSetsParams() = default; - GroupingSetsParams(ColumnNumbers used_keys_, ColumnNumbers missing_keys_) - : used_keys(std::move(used_keys_)) - , missing_keys(std::move(missing_keys_)) - {} + GroupingSetsParams(Names used_keys_, Names missing_keys_) : used_keys(std::move(used_keys_)), missing_keys(std::move(missing_keys_)) { } - ColumnNumbers used_keys; - ColumnNumbers missing_keys; + Names used_keys; + Names missing_keys; }; using GroupingSetsParamsList = std::vector; @@ -33,7 +30,6 @@ public: Aggregator::Params params_, GroupingSetsParamsList grouping_sets_params_, bool final_, - bool only_merge_, size_t max_block_size_, size_t aggregation_in_order_max_block_bytes_, size_t merge_threads_, @@ -54,10 +50,11 @@ public: const Aggregator::Params & getParams() const { return params; } private: + void updateOutputStream() override; + Aggregator::Params params; GroupingSetsParamsList grouping_sets_params; bool final; - bool only_merge; size_t max_block_size; size_t aggregation_in_order_max_block_bytes; size_t merge_threads; diff --git a/src/Processors/QueryPlan/ArrayJoinStep.cpp b/src/Processors/QueryPlan/ArrayJoinStep.cpp index 3ca5b9109e6..bd1908a4a6d 100644 --- a/src/Processors/QueryPlan/ArrayJoinStep.cpp +++ b/src/Processors/QueryPlan/ArrayJoinStep.cpp @@ -34,40 +34,19 @@ ArrayJoinStep::ArrayJoinStep(const DataStream & input_stream_, ArrayJoinActionPt { } -void ArrayJoinStep::updateInputStream(DataStream input_stream, Block result_header) +void ArrayJoinStep::updateOutputStream() { output_stream = createOutputStream( - input_stream, - ArrayJoinTransform::transformHeader(input_stream.header, array_join), - getDataStreamTraits()); - - input_streams.clear(); - input_streams.emplace_back(std::move(input_stream)); - res_header = std::move(result_header); + input_streams.front(), ArrayJoinTransform::transformHeader(input_streams.front().header, array_join), getDataStreamTraits()); } -void ArrayJoinStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) +void ArrayJoinStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) { bool on_totals = stream_type == QueryPipelineBuilder::StreamType::Totals; return std::make_shared(header, array_join, on_totals); }); - - if (res_header && !blocksHaveEqualStructure(res_header, output_stream->header)) - { - auto actions_dag = ActionsDAG::makeConvertingActions( - pipeline.getHeader().getColumnsWithTypeAndName(), - res_header.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Name); - - auto actions = std::make_shared(actions_dag, settings.getActionsSettings()); - - pipeline.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, actions); - }); - } } void ArrayJoinStep::describeActions(FormatSettings & settings) const diff --git a/src/Processors/QueryPlan/ArrayJoinStep.h b/src/Processors/QueryPlan/ArrayJoinStep.h index 83df4d021e8..2d9b2ebd0c8 100644 --- a/src/Processors/QueryPlan/ArrayJoinStep.h +++ b/src/Processors/QueryPlan/ArrayJoinStep.h @@ -13,18 +13,17 @@ public: explicit ArrayJoinStep(const DataStream & input_stream_, ArrayJoinActionPtr array_join_); String getName() const override { return "ArrayJoin"; } - void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override; + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; - void updateInputStream(DataStream input_stream, Block result_header); - const ArrayJoinActionPtr & arrayJoin() const { return array_join; } private: + void updateOutputStream() override; + ArrayJoinActionPtr array_join; - Block res_header; }; } diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index 6b6f9d361ef..94d841ff095 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -49,6 +49,11 @@ void CreatingSetStep::transformPipeline(QueryPipelineBuilder & pipeline, const B pipeline.addCreatingSetsTransform(getOutputStream().header, std::move(subquery_for_set), network_transfer_limits, getContext()); } +void CreatingSetStep::updateOutputStream() +{ + output_stream = createOutputStream(input_streams.front(), Block{}, getDataStreamTraits()); +} + void CreatingSetStep::describeActions(FormatSettings & settings) const { String prefix(settings.offset, ' '); diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h index e20c28e10f4..20cdd24c8a9 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.h +++ b/src/Processors/QueryPlan/CreatingSetsStep.h @@ -27,6 +27,8 @@ public: void describeActions(FormatSettings & settings) const override; private: + void updateOutputStream() override; + String description; SubqueryForSet subquery_for_set; SizeLimits network_transfer_limits; diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index 91c85a08412..b0c57491085 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -24,14 +24,15 @@ static ITransformingStep::Traits getTraits() }; } -CubeStep::CubeStep(const DataStream & input_stream_, AggregatingTransformParamsPtr params_) - : ITransformingStep(input_stream_, appendGroupingSetColumn(params_->getHeader()), getTraits()) - , keys_size(params_->params.keys_size) +CubeStep::CubeStep(const DataStream & input_stream_, Aggregator::Params params_, bool final_) + : ITransformingStep(input_stream_, appendGroupingSetColumn(params_.getHeader(input_stream_.header, final_)), getTraits()) + , keys_size(params_.keys_size) , params(std::move(params_)) + , final(final_) { /// Aggregation keys are distinct - for (auto key : params->params.keys) - output_stream->distinct_columns.insert(params->params.src_header.getByPosition(key).name); + for (const auto & key : params.keys) + output_stream->distinct_columns.insert(key); } ProcessorPtr addGroupingSetForTotals(const Block & header, const BuildQueryPipelineSettings & settings, UInt64 grouping_set_number) @@ -59,13 +60,23 @@ void CubeStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQue if (stream_type == QueryPipelineBuilder::StreamType::Totals) return addGroupingSetForTotals(header, settings, (UInt64(1) << keys_size) - 1); - return std::make_shared(header, std::move(params)); + auto transform_params = std::make_shared(header, std::move(params), final); + return std::make_shared(header, std::move(transform_params)); }); } const Aggregator::Params & CubeStep::getParams() const { - return params->params; + return params; } +void CubeStep::updateOutputStream() +{ + output_stream = createOutputStream( + input_streams.front(), appendGroupingSetColumn(params.getHeader(input_streams.front().header, final)), getDataStreamTraits()); + + /// Aggregation keys are distinct + for (const auto & key : params.keys) + output_stream->distinct_columns.insert(key); +} } diff --git a/src/Processors/QueryPlan/CubeStep.h b/src/Processors/QueryPlan/CubeStep.h index d3e26f9379f..87f22de7fc6 100644 --- a/src/Processors/QueryPlan/CubeStep.h +++ b/src/Processors/QueryPlan/CubeStep.h @@ -13,7 +13,7 @@ using AggregatingTransformParamsPtr = std::shared_ptrdistinct_columns.empty() /// Columns already distinct, do nothing + && (!pre_distinct /// Main distinct + || input_streams.front().has_single_port)) /// pre_distinct for single port works as usual one + { + /// Build distinct set. + for (const auto & name : columns) + output_stream->distinct_columns.insert(name); + } +} + } diff --git a/src/Processors/QueryPlan/DistinctStep.h b/src/Processors/QueryPlan/DistinctStep.h index a48a779425d..a4424e01d72 100644 --- a/src/Processors/QueryPlan/DistinctStep.h +++ b/src/Processors/QueryPlan/DistinctStep.h @@ -24,6 +24,8 @@ public: void describeActions(FormatSettings & settings) const override; private: + void updateOutputStream() override; + SizeLimits set_size_limits; UInt64 limit_hint; Names columns; diff --git a/src/Processors/QueryPlan/ExpressionStep.cpp b/src/Processors/QueryPlan/ExpressionStep.cpp index 33d2ad6e1cf..c9f1e1ce242 100644 --- a/src/Processors/QueryPlan/ExpressionStep.cpp +++ b/src/Processors/QueryPlan/ExpressionStep.cpp @@ -38,19 +38,6 @@ ExpressionStep::ExpressionStep(const DataStream & input_stream_, ActionsDAGPtr a updateDistinctColumns(output_stream->header, output_stream->distinct_columns); } -void ExpressionStep::updateInputStream(DataStream input_stream, bool keep_header) -{ - Block out_header = keep_header ? std::move(output_stream->header) - : ExpressionTransform::transformHeader(input_stream.header, *actions_dag); - output_stream = createOutputStream( - input_stream, - std::move(out_header), - getDataStreamTraits()); - - input_streams.clear(); - input_streams.emplace_back(std::move(input_stream)); -} - void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { auto expression = std::make_shared(actions_dag, settings.getActionsSettings()); @@ -101,4 +88,10 @@ void ExpressionStep::describeActions(JSONBuilder::JSONMap & map) const map.add("Expression", expression->toTree()); } +void ExpressionStep::updateOutputStream() +{ + output_stream = createOutputStream( + input_streams.front(), ExpressionTransform::transformHeader(input_streams.front().header, *actions_dag), getDataStreamTraits()); +} + } diff --git a/src/Processors/QueryPlan/ExpressionStep.h b/src/Processors/QueryPlan/ExpressionStep.h index 94c2ba21bc1..96869a9e9ca 100644 --- a/src/Processors/QueryPlan/ExpressionStep.h +++ b/src/Processors/QueryPlan/ExpressionStep.h @@ -20,8 +20,6 @@ public: void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override; - void updateInputStream(DataStream input_stream, bool keep_header); - void describeActions(FormatSettings & settings) const override; const ActionsDAGPtr & getExpression() const { return actions_dag; } @@ -29,6 +27,8 @@ public: void describeActions(JSONBuilder::JSONMap & map) const override; private: + void updateOutputStream() override; + ActionsDAGPtr actions_dag; }; diff --git a/src/Processors/QueryPlan/ExtremesStep.h b/src/Processors/QueryPlan/ExtremesStep.h index 7898796306c..57ccef077aa 100644 --- a/src/Processors/QueryPlan/ExtremesStep.h +++ b/src/Processors/QueryPlan/ExtremesStep.h @@ -12,6 +12,12 @@ public: String getName() const override { return "Extremes"; } void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + +private: + void updateOutputStream() override + { + output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits()); + } }; } diff --git a/src/Processors/QueryPlan/FillingStep.cpp b/src/Processors/QueryPlan/FillingStep.cpp index 41bc2f44dba..8a370786820 100644 --- a/src/Processors/QueryPlan/FillingStep.cpp +++ b/src/Processors/QueryPlan/FillingStep.cpp @@ -57,4 +57,12 @@ void FillingStep::describeActions(JSONBuilder::JSONMap & map) const map.add("Sort Description", explainSortDescription(sort_description)); } +void FillingStep::updateOutputStream() +{ + if (!input_streams.front().has_single_port) + throw Exception("FillingStep expects single input", ErrorCodes::LOGICAL_ERROR); + + output_stream = createOutputStream( + input_streams.front(), FillingTransform::transformHeader(input_streams.front().header, sort_description), getDataStreamTraits()); +} } diff --git a/src/Processors/QueryPlan/FillingStep.h b/src/Processors/QueryPlan/FillingStep.h index 0393b2c525b..4e1b5b3654d 100644 --- a/src/Processors/QueryPlan/FillingStep.h +++ b/src/Processors/QueryPlan/FillingStep.h @@ -22,6 +22,8 @@ public: const SortDescription & getSortDescription() const { return sort_description; } private: + void updateOutputStream() override; + SortDescription sort_description; InterpolateDescriptionPtr interpolate_description; }; diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp index df75c37dc97..ff58abf8874 100644 --- a/src/Processors/QueryPlan/FilterStep.cpp +++ b/src/Processors/QueryPlan/FilterStep.cpp @@ -46,25 +46,6 @@ FilterStep::FilterStep( updateDistinctColumns(output_stream->header, output_stream->distinct_columns); } -void FilterStep::updateInputStream(DataStream input_stream, bool keep_header) -{ - Block out_header = std::move(output_stream->header); - if (keep_header) - out_header = FilterTransform::transformHeader( - input_stream.header, - *actions_dag, - filter_column_name, - remove_filter_column); - - output_stream = createOutputStream( - input_stream, - std::move(out_header), - getDataStreamTraits()); - - input_streams.clear(); - input_streams.emplace_back(std::move(input_stream)); -} - void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { auto expression = std::make_shared(actions_dag, settings.getActionsSettings()); @@ -124,4 +105,13 @@ void FilterStep::describeActions(JSONBuilder::JSONMap & map) const map.add("Expression", expression->toTree()); } +void FilterStep::updateOutputStream() +{ + output_stream = createOutputStream( + input_streams.front(), + FilterTransform::transformHeader(input_streams.front().header, *actions_dag, filter_column_name, remove_filter_column), + getDataStreamTraits()); +} + + } diff --git a/src/Processors/QueryPlan/FilterStep.h b/src/Processors/QueryPlan/FilterStep.h index 7ac5bc036e0..daea1e13c58 100644 --- a/src/Processors/QueryPlan/FilterStep.h +++ b/src/Processors/QueryPlan/FilterStep.h @@ -20,8 +20,6 @@ public: String getName() const override { return "Filter"; } void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override; - void updateInputStream(DataStream input_stream, bool keep_header); - void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; @@ -30,6 +28,8 @@ public: bool removesFilterColumn() const { return remove_filter_column; } private: + void updateOutputStream() override; + ActionsDAGPtr actions_dag; String filter_column_name; bool remove_filter_column; diff --git a/src/Processors/QueryPlan/ITransformingStep.h b/src/Processors/QueryPlan/ITransformingStep.h index 8f3641dd5bd..008642c71ee 100644 --- a/src/Processors/QueryPlan/ITransformingStep.h +++ b/src/Processors/QueryPlan/ITransformingStep.h @@ -55,6 +55,19 @@ public: const TransformTraits & getTransformTraits() const { return transform_traits; } const DataStreamTraits & getDataStreamTraits() const { return data_stream_traits; } + /// Updates the input stream of the given step. Used during query plan optimizations. + /// It won't do any validation of a new stream, so it is your responsibility to ensure that this update doesn't break anything + /// (e.g. you update data stream traits or correctly remove / add columns). + void updateInputStream(DataStream input_stream) + { + input_streams.clear(); + input_streams.emplace_back(std::move(input_stream)); + + updateOutputStream(); + + updateDistinctColumns(output_stream->header, output_stream->distinct_columns); + } + void describePipeline(FormatSettings & settings) const override; /// Append extra processors for this step. @@ -73,6 +86,8 @@ protected: TransformTraits transform_traits; private: + virtual void updateOutputStream() = 0; + /// We collect processors got after pipeline transformation. Processors processors; bool collect_processors; diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 983be9d45fb..209d91af4d8 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -40,6 +40,14 @@ void JoinStep::describePipeline(FormatSettings & settings) const IQueryPlanStep::describePipeline(processors, settings); } +void JoinStep::updateLeftStream(const DataStream & left_stream_) +{ + input_streams = {left_stream_, input_streams.at(1)}; + output_stream = DataStream{ + .header = JoiningTransform::transformHeader(left_stream_.header, join), + }; +} + static ITransformingStep::Traits getStorageJoinTraits() { return ITransformingStep::Traits @@ -87,4 +95,11 @@ void FilledJoinStep::transformPipeline(QueryPipelineBuilder & pipeline, const Bu }); } +void FilledJoinStep::updateOutputStream() +{ + output_stream = createOutputStream( + input_streams.front(), JoiningTransform::transformHeader(input_streams.front().header, join), getDataStreamTraits()); +} + + } diff --git a/src/Processors/QueryPlan/JoinStep.h b/src/Processors/QueryPlan/JoinStep.h index b9d3dff1b65..17a0cc2ae63 100644 --- a/src/Processors/QueryPlan/JoinStep.h +++ b/src/Processors/QueryPlan/JoinStep.h @@ -28,6 +28,8 @@ public: const JoinPtr & getJoin() const { return join; } + void updateLeftStream(const DataStream & left_stream_); + private: JoinPtr join; size_t max_block_size; @@ -47,6 +49,8 @@ public: void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; private: + void updateOutputStream() override; + JoinPtr join; size_t max_block_size; }; diff --git a/src/Processors/QueryPlan/LimitByStep.h b/src/Processors/QueryPlan/LimitByStep.h index eb91be8a814..0edda3247d6 100644 --- a/src/Processors/QueryPlan/LimitByStep.h +++ b/src/Processors/QueryPlan/LimitByStep.h @@ -20,6 +20,11 @@ public: void describeActions(FormatSettings & settings) const override; private: + void updateOutputStream() override + { + output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits()); + } + size_t group_length; size_t group_offset; Names columns; diff --git a/src/Processors/QueryPlan/LimitStep.cpp b/src/Processors/QueryPlan/LimitStep.cpp index aff7472e4aa..144ac16f0d5 100644 --- a/src/Processors/QueryPlan/LimitStep.cpp +++ b/src/Processors/QueryPlan/LimitStep.cpp @@ -36,13 +36,6 @@ LimitStep::LimitStep( { } -void LimitStep::updateInputStream(DataStream input_stream) -{ - input_streams.clear(); - input_streams.emplace_back(std::move(input_stream)); - output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits()); -} - void LimitStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { auto transform = std::make_shared( diff --git a/src/Processors/QueryPlan/LimitStep.h b/src/Processors/QueryPlan/LimitStep.h index f5bceeb29c7..1ae6b73cc3d 100644 --- a/src/Processors/QueryPlan/LimitStep.h +++ b/src/Processors/QueryPlan/LimitStep.h @@ -31,12 +31,14 @@ public: return limit + offset; } - /// Change input stream when limit is pushed up. TODO: add clone() for steps. - void updateInputStream(DataStream input_stream); - bool withTies() const { return with_ties; } private: + void updateOutputStream() override + { + output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits()); + } + size_t limit; size_t offset; bool always_read_till_end; diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.cpp b/src/Processors/QueryPlan/MergingAggregatedStep.cpp index 8dfb9f9c923..c898b901a6a 100644 --- a/src/Processors/QueryPlan/MergingAggregatedStep.cpp +++ b/src/Processors/QueryPlan/MergingAggregatedStep.cpp @@ -25,23 +25,26 @@ static ITransformingStep::Traits getTraits() MergingAggregatedStep::MergingAggregatedStep( const DataStream & input_stream_, - AggregatingTransformParamsPtr params_, + Aggregator::Params params_, + bool final_, bool memory_efficient_aggregation_, size_t max_threads_, size_t memory_efficient_merge_threads_) - : ITransformingStep(input_stream_, params_->getHeader(), getTraits()) - , params(params_) + : ITransformingStep(input_stream_, params_.getHeader(input_stream_.header, final_), getTraits()) + , params(std::move(params_)) + , final(final_) , memory_efficient_aggregation(memory_efficient_aggregation_) , max_threads(max_threads_) , memory_efficient_merge_threads(memory_efficient_merge_threads_) { /// Aggregation keys are distinct - for (auto key : params->params.keys) - output_stream->distinct_columns.insert(params->params.intermediate_header.getByPosition(key).name); + for (const auto & key : params.keys) + output_stream->distinct_columns.insert(key); } void MergingAggregatedStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { + auto transform_params = std::make_shared(pipeline.getHeader(), std::move(params), final); if (!memory_efficient_aggregation) { /// We union several sources into one, paralleling the work. @@ -49,9 +52,7 @@ void MergingAggregatedStep::transformPipeline(QueryPipelineBuilder & pipeline, c /// Now merge the aggregated blocks pipeline.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, params, max_threads); - }); + { return std::make_shared(header, transform_params, max_threads); }); } else { @@ -59,18 +60,28 @@ void MergingAggregatedStep::transformPipeline(QueryPipelineBuilder & pipeline, c ? static_cast(memory_efficient_merge_threads) : static_cast(max_threads); - pipeline.addMergingAggregatedMemoryEfficientTransform(params, num_merge_threads); + pipeline.addMergingAggregatedMemoryEfficientTransform(transform_params, num_merge_threads); } } void MergingAggregatedStep::describeActions(FormatSettings & settings) const { - return params->params.explain(settings.out, settings.offset); + return params.explain(settings.out, settings.offset); } void MergingAggregatedStep::describeActions(JSONBuilder::JSONMap & map) const { - params->params.explain(map); + params.explain(map); } +void MergingAggregatedStep::updateOutputStream() +{ + output_stream = createOutputStream(input_streams.front(), params.getHeader(input_streams.front().header, final), getDataStreamTraits()); + + /// Aggregation keys are distinct + for (const auto & key : params.keys) + output_stream->distinct_columns.insert(key); +} + + } diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.h b/src/Processors/QueryPlan/MergingAggregatedStep.h index eeead41b5f9..136422c8c27 100644 --- a/src/Processors/QueryPlan/MergingAggregatedStep.h +++ b/src/Processors/QueryPlan/MergingAggregatedStep.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include @@ -14,7 +15,8 @@ class MergingAggregatedStep : public ITransformingStep public: MergingAggregatedStep( const DataStream & input_stream_, - AggregatingTransformParamsPtr params_, + Aggregator::Params params_, + bool final_, bool memory_efficient_aggregation_, size_t max_threads_, size_t memory_efficient_merge_threads_); @@ -27,7 +29,10 @@ public: void describeActions(FormatSettings & settings) const override; private: - AggregatingTransformParamsPtr params; + void updateOutputStream() override; + + Aggregator::Params params; + bool final; bool memory_efficient_aggregation; size_t max_threads; size_t memory_efficient_merge_threads; diff --git a/src/Processors/QueryPlan/OffsetStep.h b/src/Processors/QueryPlan/OffsetStep.h index f16559bcfad..a32835b62a6 100644 --- a/src/Processors/QueryPlan/OffsetStep.h +++ b/src/Processors/QueryPlan/OffsetStep.h @@ -19,6 +19,11 @@ public: void describeActions(FormatSettings & settings) const override; private: + void updateOutputStream() override + { + output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits()); + } + size_t offset; }; diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 2625bf38bf7..0c17c27e7aa 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -27,10 +27,19 @@ namespace DB::ErrorCodes namespace DB::QueryPlanOptimizations { -static size_t tryAddNewFilterStep( - QueryPlan::Node * parent_node, - QueryPlan::Nodes & nodes, - const Names & allowed_inputs) +static bool filterColumnIsNotAmongAggregatesArguments(const AggregateDescriptions & aggregates, const std::string & filter_column_name) +{ + for (const auto & aggregate : aggregates) + { + const auto & argument_names = aggregate.argument_names; + if (std::find(argument_names.begin(), argument_names.end(), filter_column_name) != argument_names.end()) + return false; + } + return true; +} + +static size_t +tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, const Names & allowed_inputs, bool can_remove_filter = true) { QueryPlan::Node * child_node = parent_node->children.front(); @@ -62,11 +71,6 @@ static size_t tryAddNewFilterStep( /// Filter column was replaced to constant. const bool filter_is_constant = filter_node && filter_node->column && isColumnConst(*filter_node->column); - if (!filter_node || filter_is_constant) - /// This means that all predicates of filter were pushed down. - /// Replace current actions to expression, as we don't need to filter anything. - parent = std::make_unique(child->getOutputStream(), expression); - /// Add new Filter step before Aggregating. /// Expression/Filter -> Aggregating -> Something auto & node = nodes.emplace_back(); @@ -77,21 +81,31 @@ static size_t tryAddNewFilterStep( /// New filter column is the first one. auto split_filter_column_name = (*split_filter->getIndex().begin())->result_name; node.step = std::make_unique( - node.children.at(0)->step->getOutputStream(), std::move(split_filter), std::move(split_filter_column_name), true); + node.children.at(0)->step->getOutputStream(), std::move(split_filter), std::move(split_filter_column_name), can_remove_filter); + + if (auto * transforming_step = dynamic_cast(child.get())) + { + transforming_step->updateInputStream(node.step->getOutputStream()); + } + else + { + if (auto * join = typeid_cast(child.get())) + join->updateLeftStream(node.step->getOutputStream()); + else + throw Exception( + ErrorCodes::LOGICAL_ERROR, "We are trying to push down a filter through a step for which we cannot update input stream"); + } + + if (!filter_node || filter_is_constant) + /// This means that all predicates of filter were pushed down. + /// Replace current actions to expression, as we don't need to filter anything. + parent = std::make_unique(child->getOutputStream(), expression); + else + filter->updateInputStream(child->getOutputStream()); return 3; } -static Names getAggregatingKeys(const Aggregator::Params & params) -{ - Names keys; - keys.reserve(params.keys.size()); - for (auto pos : params.keys) - keys.push_back(params.src_header.getByPosition(pos).name); - - return keys; -} - size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) { if (parent_node->children.size() != 1) @@ -112,9 +126,14 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (auto * aggregating = typeid_cast(child.get())) { const auto & params = aggregating->getParams(); - Names keys = getAggregatingKeys(params); + const auto & keys = params.keys; - if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, keys)) + const bool filter_column_is_not_among_aggregation_keys + = std::find(keys.begin(), keys.end(), filter->getFilterColumnName()) == keys.end(); + const bool can_remove_filter = filter_column_is_not_among_aggregation_keys + && filterColumnIsNotAmongAggregatesArguments(params.aggregates, filter->getFilterColumnName()); + + if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, keys, can_remove_filter)) return updated_steps; } @@ -213,7 +232,9 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes allowed_keys.push_back(name); } - if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_keys)) + const bool can_remove_filter + = std::find(source_columns.begin(), source_columns.end(), filter->getFilterColumnName()) == source_columns.end(); + if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_keys, can_remove_filter)) return updated_steps; } } diff --git a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp index e20c5f93d6e..36aab41df49 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp @@ -36,30 +36,6 @@ size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & node auto description = parent->getStepDescription(); - /// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin. - if (split_actions.second->trivial()) - { - auto expected_header = parent->getOutputStream().header; - - /// Expression/Filter -> ArrayJoin - std::swap(parent, child); - /// ArrayJoin -> Expression/Filter - - if (expression_step) - child = std::make_unique(child_node->children.at(0)->step->getOutputStream(), - std::move(split_actions.first)); - else - child = std::make_unique(child_node->children.at(0)->step->getOutputStream(), - std::move(split_actions.first), - filter_step->getFilterColumnName(), - filter_step->removesFilterColumn()); - - child->setStepDescription(std::move(description)); - - array_join_step->updateInputStream(child->getOutputStream(), expected_header); - return 2; - } - /// Add new expression step before ARRAY JOIN. /// Expression/Filter -> ArrayJoin -> Something auto & node = nodes.emplace_back(); @@ -70,7 +46,7 @@ size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & node node.step = std::make_unique(node.children.at(0)->step->getOutputStream(), std::move(split_actions.first)); node.step->setStepDescription(description); - array_join_step->updateInputStream(node.step->getOutputStream(), {}); + array_join_step->updateInputStream(node.step->getOutputStream()); if (expression_step) parent = std::make_unique(array_join_step->getOutputStream(), split_actions.second); diff --git a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp index 872adfeccc1..c3b03a5385f 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp @@ -70,8 +70,6 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan: // so far the origin Expression (parent_node) -> Sorting (child_node) -> NeededCalculations (node_with_needed) sorting_step->updateInputStream(getChildOutputStream(*child_node)); - auto input_header = sorting_step->getInputStreams().at(0).header; - sorting_step->updateOutputStream(std::move(input_header)); auto description = parent_step->getStepDescription(); parent_step = std::make_unique(child_step->getOutputStream(), std::move(unneeded_for_sorting)); diff --git a/src/Processors/QueryPlan/RollupStep.cpp b/src/Processors/QueryPlan/RollupStep.cpp index 3b061f9c246..169976195ea 100644 --- a/src/Processors/QueryPlan/RollupStep.cpp +++ b/src/Processors/QueryPlan/RollupStep.cpp @@ -22,14 +22,15 @@ static ITransformingStep::Traits getTraits() }; } -RollupStep::RollupStep(const DataStream & input_stream_, AggregatingTransformParamsPtr params_) - : ITransformingStep(input_stream_, appendGroupingSetColumn(params_->getHeader()), getTraits()) +RollupStep::RollupStep(const DataStream & input_stream_, Aggregator::Params params_, bool final_) + : ITransformingStep(input_stream_, appendGroupingSetColumn(params_.getHeader(input_stream_.header, final_)), getTraits()) , params(std::move(params_)) - , keys_size(params->params.keys_size) + , keys_size(params.keys_size) + , final(final_) { /// Aggregation keys are distinct - for (auto key : params->params.keys) - output_stream->distinct_columns.insert(params->params.src_header.getByPosition(key).name); + for (const auto & key : params.keys) + output_stream->distinct_columns.insert(key); } ProcessorPtr addGroupingSetForTotals(const Block & header, const BuildQueryPipelineSettings & settings, UInt64 grouping_set_number); @@ -43,8 +44,20 @@ void RollupStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ if (stream_type == QueryPipelineBuilder::StreamType::Totals) return addGroupingSetForTotals(header, settings, keys_size); - return std::make_shared(header, std::move(params)); + auto transform_params = std::make_shared(header, std::move(params), true); + return std::make_shared(header, std::move(transform_params)); }); } +void RollupStep::updateOutputStream() +{ + output_stream = createOutputStream( + input_streams.front(), appendGroupingSetColumn(params.getHeader(input_streams.front().header, final)), getDataStreamTraits()); + + /// Aggregation keys are distinct + for (const auto & key : params.keys) + output_stream->distinct_columns.insert(key); +} + + } diff --git a/src/Processors/QueryPlan/RollupStep.h b/src/Processors/QueryPlan/RollupStep.h index 3dce6f74d9f..c59bf9f3ee9 100644 --- a/src/Processors/QueryPlan/RollupStep.h +++ b/src/Processors/QueryPlan/RollupStep.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include @@ -12,15 +13,18 @@ using AggregatingTransformParamsPtr = std::shared_ptrsort_mode = DataStream::SortMode::Stream; } -void SortingStep::updateInputStream(DataStream input_stream) +void SortingStep::updateOutputStream() { - input_streams.clear(); - input_streams.emplace_back(std::move(input_stream)); -} - -void SortingStep::updateOutputStream(Block result_header) -{ - output_stream = createOutputStream(input_streams.at(0), std::move(result_header), getDataStreamTraits()); + output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits()); output_stream->sort_description = result_description; output_stream->sort_mode = DataStream::SortMode::Stream; - updateDistinctColumns(output_stream->header, output_stream->distinct_columns); } void SortingStep::updateLimit(size_t limit_) diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h index 23a31b14093..ce78bb863bf 100644 --- a/src/Processors/QueryPlan/SortingStep.h +++ b/src/Processors/QueryPlan/SortingStep.h @@ -49,14 +49,12 @@ public: /// Add limit or change it to lower value. void updateLimit(size_t limit_); - void updateInputStream(DataStream input_stream); - void updateOutputStream(Block result_header); - SortDescription getSortDescription() const { return result_description; } void convertToFinishSorting(SortDescription prefix_description); private: + void updateOutputStream() override; enum class Type { diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index 5e7e7011e0e..bb918a1a02d 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -27,7 +27,7 @@ static ITransformingStep::Traits getTraits(bool has_filter) TotalsHavingStep::TotalsHavingStep( const DataStream & input_stream_, - const ColumnsMask & aggregates_mask_, + const AggregateDescriptions & aggregates_, bool overflow_row_, const ActionsDAGPtr & actions_dag_, const std::string & filter_column_, @@ -36,16 +36,16 @@ TotalsHavingStep::TotalsHavingStep( double auto_include_threshold_, bool final_) : ITransformingStep( - input_stream_, - TotalsHavingTransform::transformHeader( - input_stream_.header, - actions_dag_.get(), - filter_column_, - remove_filter_, - final_, - aggregates_mask_), - getTraits(!filter_column_.empty())) - , aggregates_mask(aggregates_mask_) + input_stream_, + TotalsHavingTransform::transformHeader( + input_stream_.header, + actions_dag_.get(), + filter_column_, + remove_filter_, + final_, + getAggregatesMask(input_stream_.header, aggregates_)), + getTraits(!filter_column_.empty())) + , aggregates(aggregates_) , overflow_row(overflow_row_) , actions_dag(actions_dag_) , filter_column_name(filter_column_) @@ -62,7 +62,7 @@ void TotalsHavingStep::transformPipeline(QueryPipelineBuilder & pipeline, const auto totals_having = std::make_shared( pipeline.getHeader(), - aggregates_mask, + getAggregatesMask(pipeline.getHeader(), aggregates), overflow_row, expression_actions, filter_column_name, @@ -125,4 +125,19 @@ void TotalsHavingStep::describeActions(JSONBuilder::JSONMap & map) const } } +void TotalsHavingStep::updateOutputStream() +{ + output_stream = createOutputStream( + input_streams.front(), + TotalsHavingTransform::transformHeader( + input_streams.front().header, + actions_dag.get(), + filter_column_name, + remove_filter, + final, + getAggregatesMask(input_streams.front().header, aggregates)), + getDataStreamTraits()); +} + + } diff --git a/src/Processors/QueryPlan/TotalsHavingStep.h b/src/Processors/QueryPlan/TotalsHavingStep.h index 4ad741a1b44..b60eab78b53 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.h +++ b/src/Processors/QueryPlan/TotalsHavingStep.h @@ -15,15 +15,15 @@ class TotalsHavingStep : public ITransformingStep { public: TotalsHavingStep( - const DataStream & input_stream_, - const ColumnsMask & aggregates_mask_, - bool overflow_row_, - const ActionsDAGPtr & actions_dag_, - const std::string & filter_column_, - bool remove_filter_, - TotalsMode totals_mode_, - double auto_include_threshold_, - bool final_); + const DataStream & input_stream_, + const AggregateDescriptions & aggregates_, + bool overflow_row_, + const ActionsDAGPtr & actions_dag_, + const std::string & filter_column_, + bool remove_filter_, + TotalsMode totals_mode_, + double auto_include_threshold_, + bool final_); String getName() const override { return "TotalsHaving"; } @@ -35,7 +35,10 @@ public: const ActionsDAGPtr & getActions() const { return actions_dag; } private: - const ColumnsMask aggregates_mask; + void updateOutputStream() override; + + const AggregateDescriptions aggregates; + bool overflow_row; ActionsDAGPtr actions_dag; String filter_column_name; diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp index 48d16ed321f..b67b394b57b 100644 --- a/src/Processors/QueryPlan/WindowStep.cpp +++ b/src/Processors/QueryPlan/WindowStep.cpp @@ -44,17 +44,13 @@ static Block addWindowFunctionResultColumns(const Block & block, return result; } -WindowStep::WindowStep(const DataStream & input_stream_, - const WindowDescription & window_description_, - const std::vector & window_functions_) - : ITransformingStep( - input_stream_, - addWindowFunctionResultColumns(input_stream_.header, - window_functions_), - getTraits()) +WindowStep::WindowStep( + const DataStream & input_stream_, + const WindowDescription & window_description_, + const std::vector & window_functions_) + : ITransformingStep(input_stream_, addWindowFunctionResultColumns(input_stream_.header, window_functions_), getTraits()) , window_description(window_description_) , window_functions(window_functions_) - , input_header(input_stream_.header) { // We don't remove any columns, only add, so probably we don't have to update // the output DataStream::distinct_columns. @@ -70,11 +66,12 @@ void WindowStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ // have resized it. pipeline.resize(1); - pipeline.addSimpleTransform([&](const Block & /*header*/) - { - return std::make_shared(input_header, - output_stream->header, window_description, window_functions); - }); + pipeline.addSimpleTransform( + [&](const Block & /*header*/) + { + return std::make_shared( + input_streams.front().header, output_stream->header, window_description, window_functions); + }); assertBlocksHaveEqualStructure(pipeline.getHeader(), output_stream->header, "WindowStep transform for '" + window_description.window_name + "'"); @@ -138,6 +135,14 @@ void WindowStep::describeActions(JSONBuilder::JSONMap & map) const map.add("Functions", std::move(functions_array)); } +void WindowStep::updateOutputStream() +{ + output_stream = createOutputStream( + input_streams.front(), addWindowFunctionResultColumns(input_streams.front().header, window_functions), getDataStreamTraits()); + + window_description.checkValid(); +} + const WindowDescription & WindowStep::getWindowDescription() const { return window_description; diff --git a/src/Processors/QueryPlan/WindowStep.h b/src/Processors/QueryPlan/WindowStep.h index 9b58cceb972..0bafdcbe414 100644 --- a/src/Processors/QueryPlan/WindowStep.h +++ b/src/Processors/QueryPlan/WindowStep.h @@ -28,9 +28,10 @@ public: const WindowDescription & getWindowDescription() const; private: + void updateOutputStream() override; + WindowDescription window_description; std::vector window_functions; - Block input_header; }; } diff --git a/src/Processors/TTL/TTLAggregationAlgorithm.cpp b/src/Processors/TTL/TTLAggregationAlgorithm.cpp index bce54ace054..d8b022f0acb 100644 --- a/src/Processors/TTL/TTLAggregationAlgorithm.cpp +++ b/src/Processors/TTL/TTLAggregationAlgorithm.cpp @@ -15,28 +15,31 @@ TTLAggregationAlgorithm::TTLAggregationAlgorithm( { current_key_value.resize(description.group_by_keys.size()); - ColumnNumbers keys; - for (const auto & key : description.group_by_keys) - keys.push_back(header.getPositionByName(key)); + const auto & keys = description.group_by_keys; key_columns.resize(description.group_by_keys.size()); AggregateDescriptions aggregates = description.aggregate_descriptions; - for (auto & descr : aggregates) - if (descr.arguments.empty()) - for (const auto & name : descr.argument_names) - descr.arguments.push_back(header.getPositionByName(name)); - columns_for_aggregator.resize(description.aggregate_descriptions.size()); const Settings & settings = storage_.getContext()->getSettingsRef(); - Aggregator::Params params(header, keys, aggregates, - false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, 0, 0, - settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, - storage_.getContext()->getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data, - settings.compile_aggregate_expressions, settings.min_count_to_compile_aggregate_expression); + Aggregator::Params params( + keys, + aggregates, + false, + settings.max_rows_to_group_by, + settings.group_by_overflow_mode, + 0, + 0, + settings.max_bytes_before_external_group_by, + settings.empty_result_for_aggregation_by_empty_set, + storage_.getContext()->getTemporaryVolume(), + settings.max_threads, + settings.min_free_disk_space_for_temporary_data, + settings.compile_aggregate_expressions, + settings.min_count_to_compile_aggregate_expression); - aggregator = std::make_unique(params); + aggregator = std::make_unique(header, params); if (isMaxTTLExpired()) new_ttl_info.ttl_finished = true; diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index f435d46a066..ce50ae5eeee 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -87,7 +87,8 @@ void AggregatingInOrderTransform::consume(Chunk chunk) Columns key_columns(params->params.keys_size); for (size_t i = 0; i < params->params.keys_size; ++i) { - materialized_columns.push_back(columns.at(params->params.keys[i])->convertToFullColumnIfConst()); + const auto pos = inputs.front().getHeader().getPositionByName(params->params.keys[i]); + materialized_columns.push_back(chunk.getColumns().at(pos)->convertToFullColumnIfConst()); key_columns[i] = materialized_columns.back(); if (group_by_key) key_columns_raw[i] = materialized_columns.back().get(); @@ -95,7 +96,11 @@ void AggregatingInOrderTransform::consume(Chunk chunk) Aggregator::NestedColumnsHolder nested_columns_holder; Aggregator::AggregateFunctionInstructions aggregate_function_instructions; - params->aggregator.prepareAggregateInstructions(columns, aggregate_columns, materialized_columns, aggregate_function_instructions, nested_columns_holder); + if (!params->params.only_merge) + { + params->aggregator.prepareAggregateInstructions( + columns, aggregate_columns, materialized_columns, aggregate_function_instructions, nested_columns_holder); + } size_t key_end = 0; size_t key_begin = 0; @@ -123,7 +128,7 @@ void AggregatingInOrderTransform::consume(Chunk chunk) Int64 current_memory_usage = 0; Aggregator::AggregateColumnsConstData aggregate_columns_data(params->params.aggregates_size); - if (params->only_merge) + if (params->params.only_merge) { for (size_t i = 0, j = 0; i < columns.size(); ++i) { @@ -149,7 +154,7 @@ void AggregatingInOrderTransform::consume(Chunk chunk) /// Add data to aggr. state if interval is not empty. Empty when haven't found current key in new block. if (key_begin != key_end) { - if (params->only_merge) + if (params->params.only_merge) { if (group_by_key) params->aggregator.mergeOnBlockSmall(variants, key_begin, key_end, aggregate_columns_data, key_columns_raw); diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index b5b254c3e3c..f8332742978 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -524,7 +524,7 @@ void AggregatingTransform::consume(Chunk chunk) src_rows += num_rows; src_bytes += chunk.bytes(); - if (params->only_merge) + if (params->params.only_merge) { auto block = getInputs().front().getHeader().cloneWithColumns(chunk.detachColumns()); block = materializeBlock(block); @@ -549,7 +549,7 @@ void AggregatingTransform::initGenerate() /// To do this, we pass a block with zero rows to aggregate. if (variants.empty() && params->params.keys_size == 0 && !params->params.empty_result_for_aggregation_by_empty_set) { - if (params->only_merge) + if (params->params.only_merge) params->aggregator.mergeOnBlock(getInputs().front().getHeader(), variants, no_more_keys); else params->aggregator.executeOnBlock(getInputs().front().getHeader(), variants, key_columns, aggregate_columns, no_more_keys); diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 8d62664da59..789fa970ebd 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -34,24 +34,21 @@ struct AggregatingTransformParams AggregatorListPtr aggregator_list_ptr; Aggregator & aggregator; bool final; - /// Merge data for aggregate projections. - bool only_merge = false; - AggregatingTransformParams(const Aggregator::Params & params_, bool final_, bool only_merge_) + AggregatingTransformParams(const Block & header, const Aggregator::Params & params_, bool final_) : params(params_) , aggregator_list_ptr(std::make_shared()) - , aggregator(*aggregator_list_ptr->emplace(aggregator_list_ptr->end(), params)) + , aggregator(*aggregator_list_ptr->emplace(aggregator_list_ptr->end(), header, params)) , final(final_) - , only_merge(only_merge_) { } - AggregatingTransformParams(const Aggregator::Params & params_, const AggregatorListPtr & aggregator_list_ptr_, bool final_, bool only_merge_) + AggregatingTransformParams( + const Block & header, const Aggregator::Params & params_, const AggregatorListPtr & aggregator_list_ptr_, bool final_) : params(params_) , aggregator_list_ptr(aggregator_list_ptr_) - , aggregator(*aggregator_list_ptr->emplace(aggregator_list_ptr->end(), params)) + , aggregator(*aggregator_list_ptr->emplace(aggregator_list_ptr->end(), header, params)) , final(final_) - , only_merge(only_merge_) { } diff --git a/src/Processors/Transforms/CubeTransform.cpp b/src/Processors/Transforms/CubeTransform.cpp index 83ed346dabe..b80ca29327f 100644 --- a/src/Processors/Transforms/CubeTransform.cpp +++ b/src/Processors/Transforms/CubeTransform.cpp @@ -12,9 +12,12 @@ namespace ErrorCodes CubeTransform::CubeTransform(Block header, AggregatingTransformParamsPtr params_) : IAccumulatingTransform(std::move(header), appendGroupingSetColumn(params_->getHeader())) , params(std::move(params_)) - , keys(params->params.keys) , aggregates_mask(getAggregatesMask(params->getHeader(), params->params.aggregates)) { + keys.reserve(params->params.keys_size); + for (const auto & key : params->params.keys) + keys.emplace_back(input.getHeader().getPositionByName(key)); + if (keys.size() >= 8 * sizeof(mask)) throw Exception("Too many keys are used for CubeTransform.", ErrorCodes::LOGICAL_ERROR); } diff --git a/src/Processors/Transforms/CubeTransform.h b/src/Processors/Transforms/CubeTransform.h index 4575a01935d..bd33eabd750 100644 --- a/src/Processors/Transforms/CubeTransform.h +++ b/src/Processors/Transforms/CubeTransform.h @@ -21,7 +21,7 @@ protected: private: AggregatingTransformParamsPtr params; - const ColumnNumbers keys; + ColumnNumbers keys; const ColumnsMask aggregates_mask; Chunks consumed_chunks; diff --git a/src/Processors/Transforms/RollupTransform.cpp b/src/Processors/Transforms/RollupTransform.cpp index b69a691323c..e5351d1d5e2 100644 --- a/src/Processors/Transforms/RollupTransform.cpp +++ b/src/Processors/Transforms/RollupTransform.cpp @@ -8,9 +8,11 @@ namespace DB RollupTransform::RollupTransform(Block header, AggregatingTransformParamsPtr params_) : IAccumulatingTransform(std::move(header), appendGroupingSetColumn(params_->getHeader())) , params(std::move(params_)) - , keys(params->params.keys) , aggregates_mask(getAggregatesMask(params->getHeader(), params->params.aggregates)) { + keys.reserve(params->params.keys_size); + for (const auto & key : params->params.keys) + keys.emplace_back(input.getHeader().getPositionByName(key)); } void RollupTransform::consume(Chunk chunk) diff --git a/src/Processors/Transforms/RollupTransform.h b/src/Processors/Transforms/RollupTransform.h index 8fd27e3e6a2..1630df23579 100644 --- a/src/Processors/Transforms/RollupTransform.h +++ b/src/Processors/Transforms/RollupTransform.h @@ -20,7 +20,7 @@ protected: private: AggregatingTransformParamsPtr params; - const ColumnNumbers keys; + ColumnNumbers keys; const ColumnsMask aggregates_mask; Chunks consumed_chunks; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 30fb3efcf0e..6590445572d 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -263,23 +263,22 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( Pipe ordinary_pipe; if (query_info.projection->desc->type == ProjectionDescription::Type::Aggregate) { - auto make_aggregator_params = [&](const Block & header_before_aggregation, bool projection) + auto make_aggregator_params = [&](bool projection) { - ColumnNumbers keys; - for (const auto & key : query_info.projection->aggregation_keys) - keys.push_back(header_before_aggregation.getPositionByName(key.name)); + const auto & keys = query_info.projection->aggregation_keys.getNames(); AggregateDescriptions aggregates = query_info.projection->aggregate_descriptions; - if (!projection) - { - for (auto & descr : aggregates) - if (descr.arguments.empty()) - for (const auto & name : descr.argument_names) - descr.arguments.push_back(header_before_aggregation.getPositionByName(name)); - } + + /// This part is hacky. + /// We want AggregatingTransform to work with aggregate states instead of normal columns. + /// It is almost the same, just instead of adding new data to aggregation state we merge it with existing. + /// + /// It is needed because data in projection: + /// * is not merged completely (we may have states with the same key in different parts) + /// * is not split into buckets (so if we just use MergingAggregated, it will use single thread) + const bool only_merge = projection; Aggregator::Params params( - header_before_aggregation, keys, aggregates, query_info.projection->aggregate_overflow_row, @@ -293,23 +292,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( settings.max_threads, settings.min_free_disk_space_for_temporary_data, settings.compile_aggregate_expressions, - settings.min_count_to_compile_aggregate_expression); - - bool only_merge = false; - if (projection) - { - /// The source header is also an intermediate header - params.intermediate_header = header_before_aggregation; - - /// This part is hacky. - /// We want AggregatingTransform to work with aggregate states instead of normal columns. - /// It is almost the same, just instead of adding new data to aggregation state we merge it with existing. - /// - /// It is needed because data in projection: - /// * is not merged completely (we may have states with the same key in different parts) - /// * is not split into buckets (so if we just use MergingAggregated, it will use single thread) - only_merge = true; - } + settings.min_count_to_compile_aggregate_expression, + only_merge); return std::make_pair(params, only_merge); }; @@ -343,10 +327,10 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( /// TODO apply optimize_aggregation_in_order here too (like below) auto build_aggregate_pipe = [&](Pipe & pipe, bool projection) { - auto [params, only_merge] = make_aggregator_params(pipe.getHeader(), projection); + auto [params, only_merge] = make_aggregator_params(projection); AggregatingTransformParamsPtr transform_params = std::make_shared( - std::move(params), aggregator_list_ptr, query_info.projection->aggregate_final, only_merge); + pipe.getHeader(), std::move(params), aggregator_list_ptr, query_info.projection->aggregate_final); pipe.resize(pipe.numOutputPorts(), true, true); @@ -371,7 +355,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( { auto add_aggregating_step = [&](QueryPlanPtr & query_plan, bool projection) { - auto [params, only_merge] = make_aggregator_params(query_plan->getCurrentDataStream().header, projection); + auto [params, only_merge] = make_aggregator_params(projection); auto merge_threads = num_streams; auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads @@ -390,7 +374,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( std::move(params), /* grouping_sets_params_= */ GroupingSetsParamsList{}, query_info.projection->aggregate_final, - only_merge, settings.max_block_size, settings.aggregation_in_order_max_block_bytes, merge_threads, diff --git a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference index 6917117b3e2..5aa2e645509 100644 --- a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference +++ b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference @@ -5,3 +5,4 @@ String1_0 String2_0 String3_0 String4_0 1 String1_0 String2_0 String3_0 String4_0 1 1 [0,1,2] +1 diff --git a/tests/queries/0_stateless/01763_filter_push_down_bugs.sql b/tests/queries/0_stateless/01763_filter_push_down_bugs.sql index b13282e6dca..1058bf75144 100644 --- a/tests/queries/0_stateless/01763_filter_push_down_bugs.sql +++ b/tests/queries/0_stateless/01763_filter_push_down_bugs.sql @@ -37,3 +37,7 @@ WHERE String4 ='String4_0'; DROP TABLE IF EXISTS Test; select x, y from (select [0, 1, 2] as y, 1 as a, 2 as b) array join y as x where a = 1 and b = 2 and (x = 1 or x != 1) and x = 1; + +create table t(a UInt8) engine=MergeTree order by a; +insert into t select * from numbers(2); +select a from t t1 join t t2 on t1.a = t2.a where t1.a; diff --git a/tests/queries/0_stateless/01823_explain_json.reference b/tests/queries/0_stateless/01823_explain_json.reference index 9e36660204b..9df7c16e4f4 100644 --- a/tests/queries/0_stateless/01823_explain_json.reference +++ b/tests/queries/0_stateless/01823_explain_json.reference @@ -63,8 +63,7 @@ "Argument Types": ["UInt64"], "Result Type": "Float64" }, - "Arguments": ["number"], - "Argument Positions": [0] + "Arguments": ["number"] }, { "Name": "sumIf(number, greater(number, 0))", @@ -73,8 +72,7 @@ "Argument Types": ["UInt64", "UInt8"], "Result Type": "UInt64" }, - "Arguments": ["number", "greater(number, 0)"], - "Argument Positions": [0, 2] + "Arguments": ["number", "greater(number, 0)"] } ], -------- diff --git a/tests/queries/0_stateless/01823_explain_json.sh b/tests/queries/0_stateless/01823_explain_json.sh index 3db2dcb6dc4..4d7aa5f88d6 100755 --- a/tests/queries/0_stateless/01823_explain_json.sh +++ b/tests/queries/0_stateless/01823_explain_json.sh @@ -12,7 +12,7 @@ $CLICKHOUSE_CLIENT -q "explain json = 1, description = 0, header = 1 select 1, 2 echo "--------" $CLICKHOUSE_CLIENT -q "EXPLAIN json = 1, actions = 1, header = 1, description = 0 SELECT quantile(0.2)(number), sumIf(number, number > 0) from numbers(2) group by number, number + 1 FORMAT TSVRaw - " | grep Aggregating -A 42 + " | grep Aggregating -A 40 echo "--------" $CLICKHOUSE_CLIENT -q "EXPLAIN json = 1, actions = 1, description = 0