From 3b716b07bad7cd0457008716980a7f68d229cda0 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 2 Apr 2020 12:10:09 +0300 Subject: [PATCH 1/6] Fix broken links --- tests/decimals_dictionary.xml | 2 +- tests/instructions/developer_instruction_en.md | 2 +- tests/instructions/developer_instruction_ru.md | 2 +- tests/ints_dictionary.xml | 2 +- tests/strings_dictionary.xml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/decimals_dictionary.xml b/tests/decimals_dictionary.xml index a6661ee64e5..15f5b3800b3 120000 --- a/tests/decimals_dictionary.xml +++ b/tests/decimals_dictionary.xml @@ -1 +1 @@ -../../dbms/tests/config/decimals_dictionary.xml \ No newline at end of file +config/decimals_dictionary.xml \ No newline at end of file diff --git a/tests/instructions/developer_instruction_en.md b/tests/instructions/developer_instruction_en.md index 81ffff89507..7ce5ac9b690 120000 --- a/tests/instructions/developer_instruction_en.md +++ b/tests/instructions/developer_instruction_en.md @@ -1 +1 @@ -../../../docs/en/development/developer_instruction.md \ No newline at end of file +../../docs/en/development/developer_instruction.md \ No newline at end of file diff --git a/tests/instructions/developer_instruction_ru.md b/tests/instructions/developer_instruction_ru.md index 9f912ebfec9..3beb31f0d28 120000 --- a/tests/instructions/developer_instruction_ru.md +++ b/tests/instructions/developer_instruction_ru.md @@ -1 +1 @@ -../../../docs/ru/development/developer_instruction.md \ No newline at end of file +../../docs/ru/development/developer_instruction.md \ No newline at end of file diff --git a/tests/ints_dictionary.xml b/tests/ints_dictionary.xml index bdbf0690125..1daa76b8267 120000 --- a/tests/ints_dictionary.xml +++ b/tests/ints_dictionary.xml @@ -1 +1 @@ -../../dbms/tests/config/ints_dictionary.xml \ No newline at end of file +config/ints_dictionary.xml \ No newline at end of file diff --git a/tests/strings_dictionary.xml b/tests/strings_dictionary.xml index 603d99ef4e8..be66c1da224 120000 --- a/tests/strings_dictionary.xml +++ b/tests/strings_dictionary.xml @@ -1 +1 @@ -../../dbms/tests/config/strings_dictionary.xml \ No newline at end of file +config/strings_dictionary.xml \ No newline at end of file From 266cfc52f76b9fd98206b330e2c1b82fbf39cb1c Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 2 Apr 2020 14:44:09 +0300 Subject: [PATCH 2/6] Fix splitted build on master --- dbms/Functions/FunctionsStringSearch.cpp | 709 ----------------------- 1 file changed, 709 deletions(-) delete mode 100644 dbms/Functions/FunctionsStringSearch.cpp diff --git a/dbms/Functions/FunctionsStringSearch.cpp b/dbms/Functions/FunctionsStringSearch.cpp deleted file mode 100644 index a7e4cf4f8b1..00000000000 --- a/dbms/Functions/FunctionsStringSearch.cpp +++ /dev/null @@ -1,709 +0,0 @@ -#include "FunctionsStringSearch.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int ILLEGAL_COLUMN; -} -/** Implementation details for functions of 'position' family depending on ASCII/UTF8 and case sensitiveness. - */ -struct PositionCaseSensitiveASCII -{ - /// For searching single substring inside big-enough contiguous chunk of data. Coluld have slightly expensive initialization. - using SearcherInBigHaystack = Volnitsky; - - /// For search many substrings in one string - using MultiSearcherInBigHaystack = MultiVolnitsky; - - /// For searching single substring, that is different each time. This object is created for each row of data. It must have cheap initialization. - using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher; - - static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint) - { - return SearcherInBigHaystack(needle_data, needle_size, haystack_size_hint); - } - - static SearcherInSmallHaystack createSearcherInSmallHaystack(const char * needle_data, size_t needle_size) - { - return SearcherInSmallHaystack(needle_data, needle_size); - } - - static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector & needles) - { - return MultiSearcherInBigHaystack(needles); - } - - /// Number of code points between 'begin' and 'end' (this has different behaviour for ASCII and UTF-8). - static size_t countChars(const char * begin, const char * end) { return end - begin; } - - /// Convert string to lowercase. Only for case-insensitive search. - /// Implementation is permitted to be inefficient because it is called for single string. - static void toLowerIfNeed(std::string &) { } -}; - -struct PositionCaseInsensitiveASCII -{ - /// `Volnitsky` is not used here, because one person has measured that this is better. It will be good if you question it. - using SearcherInBigHaystack = ASCIICaseInsensitiveStringSearcher; - using MultiSearcherInBigHaystack = MultiVolnitskyCaseInsensitive; - using SearcherInSmallHaystack = LibCASCIICaseInsensitiveStringSearcher; - - static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t /*haystack_size_hint*/) - { - return SearcherInBigHaystack(needle_data, needle_size); - } - - static SearcherInSmallHaystack createSearcherInSmallHaystack(const char * needle_data, size_t needle_size) - { - return SearcherInSmallHaystack(needle_data, needle_size); - } - - static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector & needles) - { - return MultiSearcherInBigHaystack(needles); - } - - static size_t countChars(const char * begin, const char * end) { return end - begin; } - - static void toLowerIfNeed(std::string & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); } -}; - -struct PositionCaseSensitiveUTF8 -{ - using SearcherInBigHaystack = VolnitskyUTF8; - using MultiSearcherInBigHaystack = MultiVolnitskyUTF8; - using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher; - - static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint) - { - return SearcherInBigHaystack(needle_data, needle_size, haystack_size_hint); - } - - static SearcherInSmallHaystack createSearcherInSmallHaystack(const char * needle_data, size_t needle_size) - { - return SearcherInSmallHaystack(needle_data, needle_size); - } - - static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector & needles) - { - return MultiSearcherInBigHaystack(needles); - } - - static size_t countChars(const char * begin, const char * end) - { - size_t res = 0; - for (auto it = begin; it != end; ++it) - if (!UTF8::isContinuationOctet(static_cast(*it))) - ++res; - return res; - } - - static void toLowerIfNeed(std::string &) { } -}; - -struct PositionCaseInsensitiveUTF8 -{ - using SearcherInBigHaystack = VolnitskyCaseInsensitiveUTF8; - using MultiSearcherInBigHaystack = MultiVolnitskyCaseInsensitiveUTF8; - using SearcherInSmallHaystack = UTF8CaseInsensitiveStringSearcher; /// TODO Very suboptimal. - - static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint) - { - return SearcherInBigHaystack(needle_data, needle_size, haystack_size_hint); - } - - static SearcherInSmallHaystack createSearcherInSmallHaystack(const char * needle_data, size_t needle_size) - { - return SearcherInSmallHaystack(needle_data, needle_size); - } - - static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector & needles) - { - return MultiSearcherInBigHaystack(needles); - } - - static size_t countChars(const char * begin, const char * end) - { - size_t res = 0; - for (auto it = begin; it != end; ++it) - if (!UTF8::isContinuationOctet(static_cast(*it))) - ++res; - return res; - } - - static void toLowerIfNeed(std::string & s) { Poco::UTF8::toLowerInPlace(s); } -}; - -template -struct PositionImpl -{ - static constexpr bool use_default_implementation_for_constants = false; - - using ResultType = UInt64; - - /// Find one substring in many strings. - static void vectorConstant( - const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & needle, PaddedPODArray & res) - { - const UInt8 * begin = data.data(); - const UInt8 * pos = begin; - const UInt8 * end = pos + data.size(); - - /// Current index in the array of strings. - size_t i = 0; - - typename Impl::SearcherInBigHaystack searcher = Impl::createSearcherInBigHaystack(needle.data(), needle.size(), end - pos); - - /// We will search for the next occurrence in all strings at once. - while (pos < end && end != (pos = searcher.search(pos, end - pos))) - { - /// Determine which index it refers to. - while (begin + offsets[i] <= pos) - { - res[i] = 0; - ++i; - } - - /// We check that the entry does not pass through the boundaries of strings. - if (pos + needle.size() < begin + offsets[i]) - res[i] = 1 + Impl::countChars(reinterpret_cast(begin + offsets[i - 1]), reinterpret_cast(pos)); - else - res[i] = 0; - - pos = begin + offsets[i]; - ++i; - } - - if (i < res.size()) - memset(&res[i], 0, (res.size() - i) * sizeof(res[0])); - } - - /// Search for substring in string. - static void constantConstant(std::string data, std::string needle, UInt64 & res) - { - Impl::toLowerIfNeed(data); - Impl::toLowerIfNeed(needle); - - res = data.find(needle); - if (res == std::string::npos) - res = 0; - else - res = 1 + Impl::countChars(data.data(), data.data() + res); - } - - /// Search each time for a different single substring inside each time different string. - static void vectorVector( - const ColumnString::Chars & haystack_data, - const ColumnString::Offsets & haystack_offsets, - const ColumnString::Chars & needle_data, - const ColumnString::Offsets & needle_offsets, - PaddedPODArray & res) - { - ColumnString::Offset prev_haystack_offset = 0; - ColumnString::Offset prev_needle_offset = 0; - - size_t size = haystack_offsets.size(); - - for (size_t i = 0; i < size; ++i) - { - size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; - size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1; - - if (0 == needle_size) - { - /// An empty string is always at the very beginning of `haystack`. - res[i] = 1; - } - else - { - /// It is assumed that the StringSearcher is not very difficult to initialize. - typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack( - reinterpret_cast(&needle_data[prev_needle_offset]), - needle_offsets[i] - prev_needle_offset - 1); /// zero byte at the end - - /// searcher returns a pointer to the found substring or to the end of `haystack`. - size_t pos = searcher.search(&haystack_data[prev_haystack_offset], &haystack_data[haystack_offsets[i] - 1]) - - &haystack_data[prev_haystack_offset]; - - if (pos != haystack_size) - { - res[i] = 1 - + Impl::countChars( - reinterpret_cast(&haystack_data[prev_haystack_offset]), - reinterpret_cast(&haystack_data[prev_haystack_offset + pos])); - } - else - res[i] = 0; - } - - prev_haystack_offset = haystack_offsets[i]; - prev_needle_offset = needle_offsets[i]; - } - } - - /// Find many substrings in single string. - static void constantVector( - const String & haystack, - const ColumnString::Chars & needle_data, - const ColumnString::Offsets & needle_offsets, - PaddedPODArray & res) - { - // NOTE You could use haystack indexing. But this is a rare case. - - ColumnString::Offset prev_needle_offset = 0; - - size_t size = needle_offsets.size(); - - for (size_t i = 0; i < size; ++i) - { - size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; - - if (0 == needle_size) - { - res[i] = 1; - } - else - { - typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack( - reinterpret_cast(&needle_data[prev_needle_offset]), needle_offsets[i] - prev_needle_offset - 1); - - size_t pos = searcher.search( - reinterpret_cast(haystack.data()), - reinterpret_cast(haystack.data()) + haystack.size()) - - reinterpret_cast(haystack.data()); - - if (pos != haystack.size()) - { - res[i] = 1 + Impl::countChars(haystack.data(), haystack.data() + pos); - } - else - res[i] = 0; - } - - prev_needle_offset = needle_offsets[i]; - } - } - - template - static void vectorFixedConstant(Args &&...) - { - throw Exception("Functions 'position' don't support FixedString haystack argument", ErrorCodes::ILLEGAL_COLUMN); - } -}; - -template -struct MultiSearchAllPositionsImpl -{ - using ResultType = UInt64; - - static void vectorConstant( - const ColumnString::Chars & haystack_data, - const ColumnString::Offsets & haystack_offsets, - const std::vector & needles, - PaddedPODArray & res) - { - auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64 - { - return 1 + Impl::countChars(reinterpret_cast(start), reinterpret_cast(end)); - }; - - auto searcher = Impl::createMultiSearcherInBigHaystack(needles); - - const size_t haystack_string_size = haystack_offsets.size(); - const size_t needles_size = needles.size(); - - /// Something can be uninitialized after the search itself - std::fill(res.begin(), res.end(), 0); - - while (searcher.hasMoreToSearch()) - { - size_t prev_offset = 0; - for (size_t j = 0, from = 0; j < haystack_string_size; ++j, from += needles_size) - { - const auto * haystack = &haystack_data[prev_offset]; - const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1; - searcher.searchOneAll(haystack, haystack_end, res.data() + from, res_callback); - prev_offset = haystack_offsets[j]; - } - } - } -}; - -template -struct MultiSearchImpl -{ - using ResultType = UInt8; - static constexpr bool is_using_hyperscan = false; - /// Variable for understanding, if we used offsets for the output, most - /// likely to determine whether the function returns ColumnVector of ColumnArray. - static constexpr bool is_column_array = false; - static auto getReturnType() { return std::make_shared>(); } - - static void vectorConstant( - const ColumnString::Chars & haystack_data, - const ColumnString::Offsets & haystack_offsets, - const std::vector & needles, - PaddedPODArray & res, - [[maybe_unused]] PaddedPODArray & offsets) - { - auto searcher = Impl::createMultiSearcherInBigHaystack(needles); - const size_t haystack_string_size = haystack_offsets.size(); - res.resize(haystack_string_size); - size_t iteration = 0; - while (searcher.hasMoreToSearch()) - { - size_t prev_offset = 0; - for (size_t j = 0; j < haystack_string_size; ++j) - { - const auto * haystack = &haystack_data[prev_offset]; - const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1; - if (iteration == 0 || !res[j]) - res[j] = searcher.searchOne(haystack, haystack_end); - prev_offset = haystack_offsets[j]; - } - ++iteration; - } - } -}; - -template -struct MultiSearchFirstPositionImpl -{ - using ResultType = UInt64; - static constexpr bool is_using_hyperscan = false; - /// Variable for understanding, if we used offsets for the output, most - /// likely to determine whether the function returns ColumnVector of ColumnArray. - static constexpr bool is_column_array = false; - static auto getReturnType() { return std::make_shared>(); } - - static void vectorConstant( - const ColumnString::Chars & haystack_data, - const ColumnString::Offsets & haystack_offsets, - const std::vector & needles, - PaddedPODArray & res, - [[maybe_unused]] PaddedPODArray & offsets) - { - auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64 - { - return 1 + Impl::countChars(reinterpret_cast(start), reinterpret_cast(end)); - }; - auto searcher = Impl::createMultiSearcherInBigHaystack(needles); - const size_t haystack_string_size = haystack_offsets.size(); - res.resize(haystack_string_size); - size_t iteration = 0; - while (searcher.hasMoreToSearch()) - { - size_t prev_offset = 0; - for (size_t j = 0; j < haystack_string_size; ++j) - { - const auto * haystack = &haystack_data[prev_offset]; - const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1; - if (iteration == 0 || res[j] == 0) - res[j] = searcher.searchOneFirstPosition(haystack, haystack_end, res_callback); - else - { - UInt64 result = searcher.searchOneFirstPosition(haystack, haystack_end, res_callback); - if (result != 0) - res[j] = std::min(result, res[j]); - } - prev_offset = haystack_offsets[j]; - } - ++iteration; - } - } -}; - -template -struct MultiSearchFirstIndexImpl -{ - using ResultType = UInt64; - static constexpr bool is_using_hyperscan = false; - /// Variable for understanding, if we used offsets for the output, most - /// likely to determine whether the function returns ColumnVector of ColumnArray. - static constexpr bool is_column_array = false; - static auto getReturnType() { return std::make_shared>(); } - - static void vectorConstant( - const ColumnString::Chars & haystack_data, - const ColumnString::Offsets & haystack_offsets, - const std::vector & needles, - PaddedPODArray & res, - [[maybe_unused]] PaddedPODArray & offsets) - { - auto searcher = Impl::createMultiSearcherInBigHaystack(needles); - const size_t haystack_string_size = haystack_offsets.size(); - res.resize(haystack_string_size); - size_t iteration = 0; - while (searcher.hasMoreToSearch()) - { - size_t prev_offset = 0; - for (size_t j = 0; j < haystack_string_size; ++j) - { - const auto * haystack = &haystack_data[prev_offset]; - const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1; - /// hasMoreToSearch traverse needles in increasing order - if (iteration == 0 || res[j] == 0) - res[j] = searcher.searchOneFirstIndex(haystack, haystack_end); - prev_offset = haystack_offsets[j]; - } - ++iteration; - } - } -}; - -/** Token search the string, means that needle must be surrounded by some separator chars, like whitespace or puctuation. - */ -template -struct HasTokenImpl -{ - using ResultType = UInt8; - - static constexpr bool use_default_implementation_for_constants = true; - - static void vectorConstant( - const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & pattern, PaddedPODArray & res) - { - if (offsets.empty()) - return; - - const UInt8 * begin = data.data(); - const UInt8 * pos = begin; - const UInt8 * end = pos + data.size(); - - /// The current index in the array of strings. - size_t i = 0; - - TokenSearcher searcher(pattern.data(), pattern.size(), end - pos); - - /// We will search for the next occurrence in all rows at once. - while (pos < end && end != (pos = searcher.search(pos, end - pos))) - { - /// Let's determine which index it refers to. - while (begin + offsets[i] <= pos) - { - res[i] = negate_result; - ++i; - } - - /// We check that the entry does not pass through the boundaries of strings. - if (pos + pattern.size() < begin + offsets[i]) - res[i] = !negate_result; - else - res[i] = negate_result; - - pos = begin + offsets[i]; - ++i; - } - - /// Tail, in which there can be no substring. - if (i < res.size()) - memset(&res[i], negate_result, (res.size() - i) * sizeof(res[0])); - } - - template - static void vectorVector(Args &&...) - { - throw Exception("Function 'hasToken' does not support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN); - } - - /// Search different needles in single haystack. - template - static void constantVector(Args &&...) - { - throw Exception("Function 'hasToken' does not support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN); - } - - template - static void vectorFixedConstant(Args &&...) - { - throw Exception("Functions 'hasToken' don't support FixedString haystack argument", ErrorCodes::ILLEGAL_COLUMN); - } -}; - - -struct NamePosition -{ - static constexpr auto name = "position"; -}; -struct NamePositionUTF8 -{ - static constexpr auto name = "positionUTF8"; -}; -struct NamePositionCaseInsensitive -{ - static constexpr auto name = "positionCaseInsensitive"; -}; -struct NamePositionCaseInsensitiveUTF8 -{ - static constexpr auto name = "positionCaseInsensitiveUTF8"; -}; -struct NameMultiSearchAllPositions -{ - static constexpr auto name = "multiSearchAllPositions"; -}; -struct NameMultiSearchAllPositionsUTF8 -{ - static constexpr auto name = "multiSearchAllPositionsUTF8"; -}; -struct NameMultiSearchAllPositionsCaseInsensitive -{ - static constexpr auto name = "multiSearchAllPositionsCaseInsensitive"; -}; -struct NameMultiSearchAllPositionsCaseInsensitiveUTF8 -{ - static constexpr auto name = "multiSearchAllPositionsCaseInsensitiveUTF8"; -}; -struct NameMultiSearchAny -{ - static constexpr auto name = "multiSearchAny"; -}; -struct NameMultiSearchAnyUTF8 -{ - static constexpr auto name = "multiSearchAnyUTF8"; -}; -struct NameMultiSearchAnyCaseInsensitive -{ - static constexpr auto name = "multiSearchAnyCaseInsensitive"; -}; -struct NameMultiSearchAnyCaseInsensitiveUTF8 -{ - static constexpr auto name = "multiSearchAnyCaseInsensitiveUTF8"; -}; -struct NameMultiSearchFirstIndex -{ - static constexpr auto name = "multiSearchFirstIndex"; -}; -struct NameMultiSearchFirstIndexUTF8 -{ - static constexpr auto name = "multiSearchFirstIndexUTF8"; -}; -struct NameMultiSearchFirstIndexCaseInsensitive -{ - static constexpr auto name = "multiSearchFirstIndexCaseInsensitive"; -}; -struct NameMultiSearchFirstIndexCaseInsensitiveUTF8 -{ - static constexpr auto name = "multiSearchFirstIndexCaseInsensitiveUTF8"; -}; -struct NameMultiSearchFirstPosition -{ - static constexpr auto name = "multiSearchFirstPosition"; -}; -struct NameMultiSearchFirstPositionUTF8 -{ - static constexpr auto name = "multiSearchFirstPositionUTF8"; -}; -struct NameMultiSearchFirstPositionCaseInsensitive -{ - static constexpr auto name = "multiSearchFirstPositionCaseInsensitive"; -}; -struct NameMultiSearchFirstPositionCaseInsensitiveUTF8 -{ - static constexpr auto name = "multiSearchFirstPositionCaseInsensitiveUTF8"; -}; - -struct NameHasToken -{ - static constexpr auto name = "hasToken"; -}; - -struct NameHasTokenCaseInsensitive -{ - static constexpr auto name = "hasTokenCaseInsensitive"; -}; - - -using FunctionPosition = FunctionsStringSearch, NamePosition>; -using FunctionPositionUTF8 = FunctionsStringSearch, NamePositionUTF8>; -using FunctionPositionCaseInsensitive = FunctionsStringSearch, NamePositionCaseInsensitive>; -using FunctionPositionCaseInsensitiveUTF8 - = FunctionsStringSearch, NamePositionCaseInsensitiveUTF8>; - -using FunctionMultiSearchAllPositions - = FunctionsMultiStringPosition, NameMultiSearchAllPositions>; -using FunctionMultiSearchAllPositionsUTF8 - = FunctionsMultiStringPosition, NameMultiSearchAllPositionsUTF8>; -using FunctionMultiSearchAllPositionsCaseInsensitive - = FunctionsMultiStringPosition, NameMultiSearchAllPositionsCaseInsensitive>; -using FunctionMultiSearchAllPositionsCaseInsensitiveUTF8 = FunctionsMultiStringPosition< - MultiSearchAllPositionsImpl, - NameMultiSearchAllPositionsCaseInsensitiveUTF8>; - -using FunctionMultiSearch = FunctionsMultiStringSearch, NameMultiSearchAny>; -using FunctionMultiSearchUTF8 = FunctionsMultiStringSearch, NameMultiSearchAnyUTF8>; -using FunctionMultiSearchCaseInsensitive - = FunctionsMultiStringSearch, NameMultiSearchAnyCaseInsensitive>; -using FunctionMultiSearchCaseInsensitiveUTF8 - = FunctionsMultiStringSearch, NameMultiSearchAnyCaseInsensitiveUTF8>; - -using FunctionMultiSearchFirstIndex - = FunctionsMultiStringSearch, NameMultiSearchFirstIndex>; -using FunctionMultiSearchFirstIndexUTF8 - = FunctionsMultiStringSearch, NameMultiSearchFirstIndexUTF8>; -using FunctionMultiSearchFirstIndexCaseInsensitive - = FunctionsMultiStringSearch, NameMultiSearchFirstIndexCaseInsensitive>; -using FunctionMultiSearchFirstIndexCaseInsensitiveUTF8 - = FunctionsMultiStringSearch, NameMultiSearchFirstIndexCaseInsensitiveUTF8>; - -using FunctionMultiSearchFirstPosition - = FunctionsMultiStringSearch, NameMultiSearchFirstPosition>; -using FunctionMultiSearchFirstPositionUTF8 - = FunctionsMultiStringSearch, NameMultiSearchFirstPositionUTF8>; -using FunctionMultiSearchFirstPositionCaseInsensitive - = FunctionsMultiStringSearch, NameMultiSearchFirstPositionCaseInsensitive>; -using FunctionMultiSearchFirstPositionCaseInsensitiveUTF8 = FunctionsMultiStringSearch< - MultiSearchFirstPositionImpl, - NameMultiSearchFirstPositionCaseInsensitiveUTF8>; - -using FunctionHasToken = FunctionsStringSearch, NameHasToken>; -using FunctionHasTokenCaseInsensitive - = FunctionsStringSearch, NameHasTokenCaseInsensitive>; - -void registerFunctionsStringSearch(FunctionFactory & factory) -{ - factory.registerFunction(FunctionFactory::CaseInsensitive); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - - factory.registerFunction(); - factory.registerFunction(); - - factory.registerAlias("locate", NamePosition::name, FunctionFactory::CaseInsensitive); -} -} From 1ff7c2ac432c4d5414303012ab7b2d754676bd7e Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 1 Apr 2020 23:16:50 +0300 Subject: [PATCH 3/6] Fix parallel distributed INSERT SELECT for remote table. --- dbms/Interpreters/InterpreterInsertQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/Interpreters/InterpreterInsertQuery.cpp b/dbms/Interpreters/InterpreterInsertQuery.cpp index b4280ee20e6..39b99b10c0d 100644 --- a/dbms/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/Interpreters/InterpreterInsertQuery.cpp @@ -176,7 +176,7 @@ BlockIO InterpreterInsertQuery::execute() "Expected exactly one connection for shard " + toString(shard_info.shard_num), ErrorCodes::LOGICAL_ERROR); /// INSERT SELECT query returns empty block - auto in_stream = std::make_shared(*connections.front(), new_query_str, Block{}, context); + auto in_stream = std::make_shared(std::move(connections), new_query_str, Block{}, context); in_streams.push_back(in_stream); } out_streams.push_back(std::make_shared(Block())); From 4075f26583f847df0e7f5368375ff7060b59b83d Mon Sep 17 00:00:00 2001 From: BayoNet Date: Thu, 2 Apr 2020 20:55:11 +0300 Subject: [PATCH 4/6] DOCS-19: parseDateTimeBestEffort (#9994) Added the parseDateTimeBestEffort description in English and Russian language. --- .../functions/type_conversion_functions.md | 122 +++++++++++++++++- .../functions/type_conversion_functions.md | 118 ++++++++++++++++- 2 files changed, 233 insertions(+), 7 deletions(-) diff --git a/docs/en/query_language/functions/type_conversion_functions.md b/docs/en/query_language/functions/type_conversion_functions.md index d898bb517b9..40e0e3cc446 100644 --- a/docs/en/query_language/functions/type_conversion_functions.md +++ b/docs/en/query_language/functions/type_conversion_functions.md @@ -402,18 +402,128 @@ SELECT └───────────────────────────┴──────────────────────────────┘ ``` -## parseDateTimeBestEffort {#type_conversion_functions-parsedatetimebesteffort} +## parseDateTimeBestEffort {#parsedatetimebesteffort} -Parse a number type argument to a Date or DateTime type. -different from toDate and toDateTime, parseDateTimeBestEffort can progress more complex date format. -For more information, see the link: [Complex Date Format](https://xkcd.com/1179/) +Converts a date and time in the [String](../../data_types/string.md) representation to [DateTime](../../data_types/datetime.md#data_type-datetime) data type. + +The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 1123 - 5.2.14 RFC-822 Date and Time Specification](https://tools.ietf.org/html/rfc1123#page-55), ClickHouse's and some other date and time formats. + + +**Syntax** + +```sql +parseDateTimeBestEffort(time_string [, time_zone]); +``` + +**Parameters** + +- `time_string` — String containing a date and time to convert. [String](../../data_types/string.md). +- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../data_types/string.md). + + +**Supported non-standard formats** + +- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time). +- A string with a date and a time component: `YYYYMMDDhhmmss`, `DD/MM/YYYY hh:mm:ss`, `DD-MM-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc. +- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `DD/MM/YYYY`, `DD-MM-YY` etc. +- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case `YYYY-MM` are substituted as `2000-01`. +- A string that includes the date and time along with time zone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`. + +For all of the formats with separator the function parses months names expressed by their full name or by the first three letters of a month name. Examples: `24/DEC/18`, `24-Dec-18`, `01-September-2018`. + +**Returned value** + +- `time_string` converted to the `DateTime` data type. + +**Examples** + +Query: + +```sql +SELECT parseDateTimeBestEffort('12/12/2020 12:12:57') +AS parseDateTimeBestEffort; +``` + +Result: + +```text +┌─parseDateTimeBestEffort─┐ +│ 2020-12-12 12:12:57 │ +└─────────────────────────┘ +``` + +Query: + +```sql +SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow') +AS parseDateTimeBestEffort +``` + +Result: + +```text +┌─parseDateTimeBestEffort─┐ +│ 2018-08-18 10:22:16 │ +└─────────────────────────┘ +``` + +Query: + +```sql +SELECT parseDateTimeBestEffort('1284101485') +AS parseDateTimeBestEffort +``` + +Result: + +```text +┌─parseDateTimeBestEffort─┐ +│ 2015-07-07 12:04:41 │ +└─────────────────────────┘ +``` + +Query: + +```sql +SELECT parseDateTimeBestEffort('2018-12-12 10:12:12') +AS parseDateTimeBestEffort +``` + +Result: + +```text +┌─parseDateTimeBestEffort─┐ +│ 2018-12-12 10:12:12 │ +└─────────────────────────┘ +``` + +Query: + +```sql +SELECT parseDateTimeBestEffort('10 20:19') +``` + +Result: + +```text +┌─parseDateTimeBestEffort('10 20:19')─┐ +│ 2000-01-10 20:19:00 │ +└─────────────────────────────────────┘ +``` + +**See Also** + +- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/) +- [RFC 1123](https://tools.ietf.org/html/rfc1123) +- [toDate](#todate) +- [toDateTime](#todatetime) ## parseDateTimeBestEffortOrNull {#parsedatetimebesteffortornull} -Same as for [parseDateTimeBestEffort](#type_conversion_functions-parsedatetimebesteffort) except that it returns null when it encounters a date format that cannot be processed. +Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it returns null when it encounters a date format that cannot be processed. ## parseDateTimeBestEffortOrZero {#parsedatetimebesteffortorzero} -Same as for [parseDateTimeBestEffort](#type_conversion_functions-parsedatetimebesteffort) except that it returns zero date or zero date time when it encounters a date format that cannot be processed. +Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it returns zero date or zero date time when it encounters a date format that cannot be processed. [Original article](https://clickhouse.tech/docs/en/query_language/functions/type_conversion_functions/) diff --git a/docs/ru/query_language/functions/type_conversion_functions.md b/docs/ru/query_language/functions/type_conversion_functions.md index 9cb478a2a02..00582cd61cb 100644 --- a/docs/ru/query_language/functions/type_conversion_functions.md +++ b/docs/ru/query_language/functions/type_conversion_functions.md @@ -360,7 +360,7 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null Приводит аргумент из числового типа данных к типу данных [IntervalType](../../data_types/special_data_types/interval.md). -**Синтксис** +**Синтаксис** ``` sql toIntervalSecond(number) @@ -399,4 +399,120 @@ SELECT └───────────────────────────┴──────────────────────────────┘ ``` +## parseDateTimeBestEffort {#parsedatetimebesteffort} + +Преобразует дату и время в [строковом](../../data_types/string.md) представлении к типу данных [DateTime](../../data_types/datetime.md#data_type-datetime). + +Функция распознаёт форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 1123 - 5.2.14 RFC-822 Date and Time Specification](https://tools.ietf.org/html/rfc1123#page-55), формат даты времени ClickHouse's а также некоторые другие форматы. + +**Синтаксис** + +```sql +parseDateTimeBestEffort(time_string[, time_zone]); +``` + +**Параметры** + +- `time_string` — строка, содержащая дату и время для преобразования. [String](../../data_types/string.md). +- `time_zone` — часовой пояс. Функция анализирует `time_string` в соответствии с заданным часовым поясом. [String](../../data_types/string.md). + +**Поддерживаемые нестандартные форматы** + +- [Unix timestamp](https://ru.wikipedia.org/wiki/Unix-время) в строковом представлении. 9 или 10 символов. +- Строка с датой и временем: `YYYYMMDDhhmmss`, `DD/MM/YYYY hh:mm:ss`, `DD-MM-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc. +- Строка с датой, но без времени: `YYYY`, `YYYYMM`, `YYYY*MM`, `DD/MM/YYYY`, `DD-MM-YY` и т.д. +- Строка с временем, и с днём: `DD`, `DD hh`, `DD hh:mm`. В этом случае `YYYY-MM` принимается равным `2000-01`. +- Строка, содержащая дату и время вместе с информацией о часовом поясе: `YYYY-MM-DD hh:mm:ss ±h:mm`, и т.д. Например, `2020-12-12 17:36:00 -5:00`. + +Для всех форматов с разделителями функция распознаёт названия месяцев, выраженных в виде полного англоязычного имени месяца или в виде первых трёх символов имени месяца. Примеры: `24/DEC/18`, `24-Dec-18`, `01-September-2018`. + + +**Возвращаемое значение** + +- `time_string` преобразованная к типу данных `DateTime`. + +**Примеры** + +Запрос: + +```sql +SELECT parseDateTimeBestEffort('12/12/2020 12:12:57') +AS parseDateTimeBestEffort; +``` + +Результат: + +```text +┌─parseDateTimeBestEffort─┐ +│ 2020-12-12 12:12:57 │ +└─────────────────────────┘ +``` + +Запрос: + +```sql +SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow') +AS parseDateTimeBestEffort +``` + +Результат: + +```text +┌─parseDateTimeBestEffort─┐ +│ 2018-08-18 10:22:16 │ +└─────────────────────────┘ +``` + +Запрос: + +```sql +SELECT parseDateTimeBestEffort('1284101485') +AS parseDateTimeBestEffort +``` + +Результат: + +```text +┌─parseDateTimeBestEffort─┐ +│ 2015-07-07 12:04:41 │ +└─────────────────────────┘ +``` + +Запрос: + +```sql +SELECT parseDateTimeBestEffort('2018-12-12 10:12:12') +AS parseDateTimeBestEffort +``` + +Результат: + +```text +┌─parseDateTimeBestEffort─┐ +│ 2018-12-12 10:12:12 │ +└─────────────────────────┘ +``` + +Запрос: + +```sql +SELECT parseDateTimeBestEffort('10 20:19') +``` + +Результат: + +```text +┌─parseDateTimeBestEffort('10 20:19')─┐ +│ 2000-01-10 20:19:00 │ +└─────────────────────────────────────┘ +``` + +**См. также** + +- [Информация о формате ISO 8601 от @xkcd](https://xkcd.com/1179/) +- [RFC 1123](https://tools.ietf.org/html/rfc1123) +- [toDate](#todate) +- [toDateTime](#todatetime) + + [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/type_conversion_functions/) From cb933a3d1974720ba14404b422ec39a66568136e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 3 Apr 2020 03:08:29 +0300 Subject: [PATCH 5/6] fix races in 01108_restart_replicas_rename_deadlock --- contrib/libcpuid/include/libcpuid/cpuid_main.c | 2 +- dbms/Common/getNumberOfPhysicalCPUCores.cpp | 1 - dbms/Interpreters/InterpreterSystemQuery.cpp | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/contrib/libcpuid/include/libcpuid/cpuid_main.c b/contrib/libcpuid/include/libcpuid/cpuid_main.c index 02a7cb7ad50..34457e297ca 100644 --- a/contrib/libcpuid/include/libcpuid/cpuid_main.c +++ b/contrib/libcpuid/include/libcpuid/cpuid_main.c @@ -38,7 +38,7 @@ /* Implementation: */ -static int _libcpiud_errno = ERR_OK; +_Thread_local int _libcpiud_errno = ERR_OK; int set_error(cpu_error_t err) { diff --git a/dbms/Common/getNumberOfPhysicalCPUCores.cpp b/dbms/Common/getNumberOfPhysicalCPUCores.cpp index 625c309bde5..32b70b76fbd 100644 --- a/dbms/Common/getNumberOfPhysicalCPUCores.cpp +++ b/dbms/Common/getNumberOfPhysicalCPUCores.cpp @@ -4,7 +4,6 @@ #include #if USE_CPUID # include -# include #elif USE_CPUINFO # include #endif diff --git a/dbms/Interpreters/InterpreterSystemQuery.cpp b/dbms/Interpreters/InterpreterSystemQuery.cpp index ff2001e4bc4..9a7d6ae7c5a 100644 --- a/dbms/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/Interpreters/InterpreterSystemQuery.cpp @@ -366,7 +366,7 @@ void InterpreterSystemQuery::restartReplicas(Context & system_context) for (auto iterator = database->getTablesIterator(system_context); iterator->isValid(); iterator->next()) { if (dynamic_cast(iterator->table().get())) - replica_names.emplace_back(iterator->table()->getStorageID()); + replica_names.emplace_back(StorageID{database->getDatabaseName(), iterator->name()}); } } From 696b7670d47d028ddd0ba58304496c1444d43ae5 Mon Sep 17 00:00:00 2001 From: Mikhail Date: Fri, 3 Apr 2020 10:27:50 +0300 Subject: [PATCH 6/6] Typo in docs (#10033) --- docs/ru/operations/table_engines/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/table_engines/index.md b/docs/ru/operations/table_engines/index.md index dd13c3db1ab..775164ccb52 100644 --- a/docs/ru/operations/table_engines/index.md +++ b/docs/ru/operations/table_engines/index.md @@ -35,7 +35,7 @@ - [StripeLog](stripelog.md) - [Log](log.md) -### Движки для интергации {#dvizhki-dlia-intergatsii} +### Движки для интеграции {#dvizhki-dlia-integratsii} Движки для связи с другими системами хранения и обработки данных.