From 0dc7cd7eb40bd3fb80e2c05871ce193a720b296c Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 14 Aug 2024 01:12:11 +0000 Subject: [PATCH 01/20] Update musl to have unwind info --- contrib/llvm-project-cmake/CMakeLists.txt | 6 ++++++ contrib/sysroot | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index 76e620314a2..f5dce1c4178 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -140,6 +140,12 @@ if (CMAKE_CROSSCOMPILING) message (STATUS "CROSS COMPILING SET LLVM HOST TRIPLE ${LLVM_HOST_TRIPLE}") endif() +# llvm-project/llvm/cmake/config-ix.cmake does a weird thing: it defines _LARGEFILE64_SOURCE, +# then checks if lseek64() function exists, then undefines _LARGEFILE64_SOURCE. +# Then the actual code that uses this function *doesn't* define _LARGEFILE64_SOURCE, so lseek64() +# may not exist and compilation fails. This happens with musl. +add_compile_definitions("_LARGEFILE64_SOURCE") + add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}") set_directory_properties (PROPERTIES diff --git a/contrib/sysroot b/contrib/sysroot index cc385041b22..866364fea62 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit cc385041b226d1fc28ead14dbab5d40a5f821dd8 +Subproject commit 866364fea629aa3e519ec967836561a6b3b21885 From 0716b460db35524f7cfa82501b5d1d2812904688 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 14 Aug 2024 03:00:51 +0000 Subject: [PATCH 02/20] Fix duplicate symbol errors --- base/harmful/harmful.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/base/harmful/harmful.c b/base/harmful/harmful.c index 54b552a84ea..19bb962999f 100644 --- a/base/harmful/harmful.c +++ b/base/harmful/harmful.c @@ -66,13 +66,11 @@ TRAP(gethostbyname) TRAP(gethostbyname2) TRAP(gethostent) TRAP(getlogin) -TRAP(getmntent) TRAP(getnetbyaddr) TRAP(getnetbyname) TRAP(getnetent) TRAP(getnetgrent) TRAP(getnetgrent_r) -TRAP(getopt) TRAP(getopt_long) TRAP(getopt_long_only) TRAP(getpass) @@ -133,7 +131,6 @@ TRAP(nrand48) TRAP(__ppc_get_timebase_freq) TRAP(ptsname) TRAP(putchar_unlocked) -TRAP(putenv) TRAP(pututline) TRAP(pututxline) TRAP(putwchar_unlocked) @@ -148,7 +145,6 @@ TRAP(sethostent) TRAP(sethostid) TRAP(setkey) //TRAP(setlocale) // Used by replxx at startup -TRAP(setlogmask) TRAP(setnetent) TRAP(setnetgrent) TRAP(setprotoent) @@ -203,7 +199,6 @@ TRAP(lgammal) TRAP(nftw) TRAP(nl_langinfo) TRAP(putc_unlocked) -TRAP(rand) /** In the current POSIX.1 specification (POSIX.1-2008), readdir() is not required to be thread-safe. However, in modern * implementations (including the glibc implementation), concurrent calls to readdir() that specify different directory streams * are thread-safe. In cases where multiple threads must read from the same directory stream, using readdir() with external @@ -288,4 +283,14 @@ TRAP(tss_get) TRAP(tss_set) TRAP(tss_delete) +#ifndef USE_MUSL +/// These produce duplicate symbol errors when statically linking with musl. +/// Maybe we can remove them from the musl fork. +TRAP(getopt) +TRAP(putenv) +TRAP(setlogmask) +TRAP(rand) +TRAP(getmntent) +#endif + #endif From c869b0651932bc61d5040395a3a2d0689485a5a1 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 14 Aug 2024 03:13:57 +0000 Subject: [PATCH 03/20] Remove getpwuid() calls in Poco::PathImpl --- base/poco/Foundation/src/Path_UNIX.cpp | 18 +++++------------- contrib/sysroot | 2 +- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/base/poco/Foundation/src/Path_UNIX.cpp b/base/poco/Foundation/src/Path_UNIX.cpp index 957a62db180..fb2ed71622f 100644 --- a/base/poco/Foundation/src/Path_UNIX.cpp +++ b/base/poco/Foundation/src/Path_UNIX.cpp @@ -48,25 +48,17 @@ std::string PathImpl::currentImpl() std::string PathImpl::homeImpl() { std::string path; -#if defined(_POSIX_C_SOURCE) || defined(_BSD_SOURCE) || defined(_POSIX_C_SOURCE) size_t buf_size = 1024; // Same as glibc use for getpwuid std::vector buf(buf_size); struct passwd res; struct passwd* pwd = nullptr; getpwuid_r(getuid(), &res, buf.data(), buf_size, &pwd); -#else - struct passwd* pwd = getpwuid(getuid()); -#endif if (pwd) path = pwd->pw_dir; else { -#if defined(_POSIX_C_SOURCE) || defined(_BSD_SOURCE) || defined(_POSIX_C_SOURCE) getpwuid_r(getuid(), &res, buf.data(), buf_size, &pwd); -#else - pwd = getpwuid(geteuid()); -#endif if (pwd) path = pwd->pw_dir; else @@ -82,7 +74,7 @@ std::string PathImpl::configHomeImpl() { std::string path = PathImpl::homeImpl(); std::string::size_type n = path.size(); - if (n > 0 && path[n - 1] == '/') + if (n > 0 && path[n - 1] == '/') #if POCO_OS == POCO_OS_MAC_OS_X path.append("Library/Preferences/"); #else @@ -97,7 +89,7 @@ std::string PathImpl::dataHomeImpl() { std::string path = PathImpl::homeImpl(); std::string::size_type n = path.size(); - if (n > 0 && path[n - 1] == '/') + if (n > 0 && path[n - 1] == '/') #if POCO_OS == POCO_OS_MAC_OS_X path.append("Library/Application Support/"); #else @@ -112,7 +104,7 @@ std::string PathImpl::cacheHomeImpl() { std::string path = PathImpl::homeImpl(); std::string::size_type n = path.size(); - if (n > 0 && path[n - 1] == '/') + if (n > 0 && path[n - 1] == '/') #if POCO_OS == POCO_OS_MAC_OS_X path.append("Library/Caches/"); #else @@ -127,7 +119,7 @@ std::string PathImpl::tempHomeImpl() { std::string path = PathImpl::homeImpl(); std::string::size_type n = path.size(); - if (n > 0 && path[n - 1] == '/') + if (n > 0 && path[n - 1] == '/') #if POCO_OS == POCO_OS_MAC_OS_X path.append("Library/Caches/"); #else @@ -159,7 +151,7 @@ std::string PathImpl::tempImpl() std::string PathImpl::configImpl() { std::string path; - + #if POCO_OS == POCO_OS_MAC_OS_X path = "/Library/Preferences/"; #else diff --git a/contrib/sysroot b/contrib/sysroot index 866364fea62..c2d74e21ba1 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit 866364fea629aa3e519ec967836561a6b3b21885 +Subproject commit c2d74e21ba1b8a27966e344693e176f927e4eb50 From b1963738bdf8e02e78a0be1b2cce1b992a6d533c Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 15 Aug 2024 21:41:40 +0000 Subject: [PATCH 04/20] Rebase --- contrib/sysroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sysroot b/contrib/sysroot index c2d74e21ba1..eb35c10ac57 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit c2d74e21ba1b8a27966e344693e176f927e4eb50 +Subproject commit eb35c10ac5725da7ef4be88b303895e1b5d153be From b3a3d1e7202837e2da45b6775b4ea8c389c881a7 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 16 Aug 2024 05:53:17 +0000 Subject: [PATCH 05/20] another rebase --- contrib/sysroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sysroot b/contrib/sysroot index eb35c10ac57..73752937366 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit eb35c10ac5725da7ef4be88b303895e1b5d153be +Subproject commit 737529373665bc067971ba098a12d6928580a0ae From 76960eff8005818db7f744115798888e72e4b2e5 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 16 Aug 2024 18:38:03 +0000 Subject: [PATCH 06/20] This should work --- contrib/sysroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sysroot b/contrib/sysroot index 73752937366..b0fce6066fc 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit 737529373665bc067971ba098a12d6928580a0ae +Subproject commit b0fce6066fc2678fa17ee7a98f794da9da8492ff From 5b3ca6b2b916b63d12ddb402c0726d0c2e29c1b1 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 28 Aug 2024 22:14:43 +0800 Subject: [PATCH 07/20] allow unsligned arrays in arrayZip --- src/Functions/array/arrayZip.cpp | 114 +++++++++++++++++++++++-------- 1 file changed, 85 insertions(+), 29 deletions(-) diff --git a/src/Functions/array/arrayZip.cpp b/src/Functions/array/arrayZip.cpp index 6c6fff5926b..39c04264c84 100644 --- a/src/Functions/array/arrayZip.cpp +++ b/src/Functions/array/arrayZip.cpp @@ -1,7 +1,8 @@ -#include #include -#include +#include +#include #include +#include #include #include #include @@ -12,23 +13,22 @@ namespace DB namespace ErrorCodes { - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int SIZES_OF_ARRAYS_DONT_MATCH; - extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; - extern const int ILLEGAL_COLUMN; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int SIZES_OF_ARRAYS_DONT_MATCH; +extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; +extern const int ILLEGAL_COLUMN; } /// arrayZip(['a', 'b', 'c'], ['d', 'e', 'f']) = [('a', 'd'), ('b', 'e'), ('c', 'f')] +/// arrayZipUnaligned(['a', 'b', 'c'], ['d', 'e']) = [('a', 'd'), ('b', 'e'), ('c', null)] +template class FunctionArrayZip : public IFunction { public: - static constexpr auto name = "arrayZip"; + static constexpr auto name = allow_unaligned ? "arrayZipUnaligned" : "arrayZip"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } + String getName() const override { return name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } @@ -39,8 +39,11 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.empty()) - throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, - "Function {} needs at least one argument; passed {}." , getName(), arguments.size()); + throw Exception( + ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, + "Function {} needs at least one argument; passed {}.", + getName(), + arguments.size()); DataTypes arguments_types; for (size_t index = 0; index < arguments.size(); ++index) @@ -48,16 +51,24 @@ public: const DataTypeArray * array_type = checkAndGetDataType(arguments[index].type.get()); if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array. Found {} instead.", - toString(index + 1), getName(), arguments[0].type->getName()); + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument {} of function {} must be array. Found {} instead.", + toString(index + 1), + getName(), + arguments[0].type->getName()); - arguments_types.emplace_back(array_type->getNestedType()); + auto nested_type = array_type->getNestedType(); + if constexpr (allow_unaligned) + nested_type = makeNullable(nested_type); + arguments_types.emplace_back(nested_type); } return std::make_shared(std::make_shared(arguments_types)); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override { size_t num_arguments = arguments.size(); @@ -68,12 +79,19 @@ public: { /// Constant columns cannot be inside tuple. It's only possible to have constant tuple as a whole. ColumnPtr holder = arguments[i].column->convertToFullColumnIfConst(); - const ColumnArray * column_array = checkAndGetColumn(holder.get()); - if (!column_array) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument {} of function {} must be array. Found column {} instead.", - i + 1, getName(), holder->getName()); + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Argument {} of function {} must be array. Found column {} instead.", + i + 1, + getName(), + holder->getName()); + + tuple_columns[i] = column_array->getDataPtr(); + + if constexpr (allow_unaligned) + tuple_columns[i] = makeNullable(tuple_columns[i]); if (i == 0) { @@ -81,23 +99,61 @@ public: } else if (!column_array->hasEqualOffsets(static_cast(*first_array_column))) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, - "The argument 1 and argument {} of function {} have different array sizes", - i + 1, getName()); + if constexpr (allow_unaligned) + return executeUnaligned(static_cast(*first_array_column), *column_array, input_rows_count); + else + throw Exception( + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, + "The argument 1 and argument {} of function {} have different array sizes", + i + 1, + getName()); } - - tuple_columns[i] = column_array->getDataPtr(); } return ColumnArray::create( - ColumnTuple::create(tuple_columns), static_cast(*first_array_column).getOffsetsPtr()); + ColumnTuple::create(std::move(tuple_columns)), static_cast(*first_array_column).getOffsetsPtr()); + } + +private: + ColumnPtr + executeUnaligned(const ColumnArray & first_array_colmn, const ColumnArray & second_array_column, size_t input_rows_count) const + { + const auto & first_data = first_array_colmn.getDataPtr(); + const auto & second_data = second_array_column.getDataPtr(); + const auto & nullable_first_data = makeNullable(first_data); + const auto & nullable_second_data = makeNullable(second_data); + auto res_first_data = nullable_first_data->cloneEmpty(); + auto res_second_data = nullable_second_data->cloneEmpty(); + auto res_offsets_column = ColumnArray::ColumnOffsets::create(input_rows_count); + auto & res_offsets = assert_cast(*res_offsets_column).getData(); + + const auto & first_offsets = first_array_colmn.getOffsets(); + const auto & second_offsets = second_array_column.getOffsets(); + for (size_t i = 0; i < input_rows_count; ++i) + { + size_t first_size = first_offsets[i] - first_offsets[i - 1]; + size_t second_size = second_offsets[i] - second_offsets[i - 1]; + + res_first_data->insertRangeFrom(*nullable_first_data, first_offsets[i - 1], first_size); + res_second_data->insertRangeFrom(*nullable_second_data, second_offsets[i - 1], second_size); + + if (first_size < second_size) + res_first_data->insertManyDefaults(second_size - first_size); + else if (first_size > second_size) + res_second_data->insertManyDefaults(first_size - second_size); + + res_offsets[i] = std::max(first_size, second_size); + } + + Columns tuple_columns{std::move(res_first_data), std::move(res_second_data)}; + return ColumnArray::create(ColumnTuple::create(std::move(tuple_columns)), std::move(res_offsets_column)); } }; REGISTER_FUNCTION(ArrayZip) { - factory.registerFunction(); + factory.registerFunction>(); + factory.registerFunction>(); } } - From 1cd4af1564d4dfbd1592708a032b126d1ee1ab50 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 2 Sep 2024 11:55:41 +0800 Subject: [PATCH 08/20] add new function arrayZipUnaligned --- .../functions/array-functions.md | 38 +++++++ src/Functions/array/arrayZip.cpp | 102 +++++++++++------- .../03230_array_zip_unaligned.reference | 5 + .../0_stateless/03230_array_zip_unaligned.sql | 13 +++ 4 files changed, 118 insertions(+), 40 deletions(-) create mode 100644 tests/queries/0_stateless/03230_array_zip_unaligned.reference create mode 100644 tests/queries/0_stateless/03230_array_zip_unaligned.sql diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 1b52440903d..ad971ae7554 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -2035,6 +2035,7 @@ Query: SELECT arrayZip(['a', 'b', 'c'], [5, 2, 1]); ``` + Result: ``` text @@ -2043,6 +2044,43 @@ Result: └──────────────────────────────────────┘ ``` +## arrayZipUnaligned + +Combines multiple arrays into a single array, allowing for unaligned arrays. The resulting array contains the corresponding elements of the source arrays grouped into tuples in the listed order of arguments. + +**Syntax** + +``` sql +arrayZipUnaligned(arr1, arr2, ..., arrN) +``` + +**Arguments** + +- `arrN` — [Array](../data-types/array.md). + +The function can take any number of arrays of different types. + +**Returned value** + +- Array with elements from the source arrays grouped into [tuples](../data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. [Array](../data-types/array.md). If the arrays have different sizes, the shorter arrays will be padded with `null` values. + +**Example** + +Query: + +``` sql +SELECT arrayZipUnaligned(['a'], [1, 2, 3]); +``` + +Result: + +``` text +┌─arrayZipUnaligned(['a'], [1, 2, 3])─┐ +│ [('a',1),(NULL,2),(NULL,3)] │ +└─────────────────────────────────────┘ +``` + + ## arrayAUC Calculate AUC (Area Under the Curve, which is a concept in machine learning, see more details: ). diff --git a/src/Functions/array/arrayZip.cpp b/src/Functions/array/arrayZip.cpp index 39c04264c84..494ee0d9590 100644 --- a/src/Functions/array/arrayZip.cpp +++ b/src/Functions/array/arrayZip.cpp @@ -71,14 +71,17 @@ public: executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override { size_t num_arguments = arguments.size(); - - ColumnPtr first_array_column; + Columns holders(num_arguments); Columns tuple_columns(num_arguments); + bool has_unaligned = false; + size_t unaligned_index = 0; for (size_t i = 0; i < num_arguments; ++i) { /// Constant columns cannot be inside tuple. It's only possible to have constant tuple as a whole. ColumnPtr holder = arguments[i].column->convertToFullColumnIfConst(); + holders[i] = holder; + const ColumnArray * column_array = checkAndGetColumn(holder.get()); if (!column_array) throw Exception( @@ -87,18 +90,11 @@ public: i + 1, getName(), holder->getName()); - tuple_columns[i] = column_array->getDataPtr(); - if constexpr (allow_unaligned) - tuple_columns[i] = makeNullable(tuple_columns[i]); - - if (i == 0) - { - first_array_column = holder; - } - else if (!column_array->hasEqualOffsets(static_cast(*first_array_column))) + if (i && !column_array->hasEqualOffsets(static_cast(*holders[0]))) { + /* if constexpr (allow_unaligned) return executeUnaligned(static_cast(*first_array_column), *column_array, input_rows_count); else @@ -107,46 +103,72 @@ public: "The argument 1 and argument {} of function {} have different array sizes", i + 1, getName()); + */ + has_unaligned = true; + unaligned_index = i; } } - return ColumnArray::create( - ColumnTuple::create(std::move(tuple_columns)), static_cast(*first_array_column).getOffsetsPtr()); + if constexpr (!allow_unaligned) + { + if (has_unaligned) + throw Exception( + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, + "The argument 1 and argument {} of function {} have different array sizes", + unaligned_index + 1, + getName()); + else + return ColumnArray::create( + ColumnTuple::create(std::move(tuple_columns)), static_cast(*holders[0]).getOffsetsPtr()); + } + else + return executeUnaligned(holders, tuple_columns, input_rows_count); } private: - ColumnPtr - executeUnaligned(const ColumnArray & first_array_colmn, const ColumnArray & second_array_column, size_t input_rows_count) const + ColumnPtr executeUnaligned(const Columns & holders, Columns & tuple_columns, size_t input_rows_count) const { - const auto & first_data = first_array_colmn.getDataPtr(); - const auto & second_data = second_array_column.getDataPtr(); - const auto & nullable_first_data = makeNullable(first_data); - const auto & nullable_second_data = makeNullable(second_data); - auto res_first_data = nullable_first_data->cloneEmpty(); - auto res_second_data = nullable_second_data->cloneEmpty(); - auto res_offsets_column = ColumnArray::ColumnOffsets::create(input_rows_count); - auto & res_offsets = assert_cast(*res_offsets_column).getData(); + std::vector array_columns(holders.size()); + for (size_t i = 0; i < holders.size(); ++i) + array_columns[i] = checkAndGetColumn(holders[i].get()); - const auto & first_offsets = first_array_colmn.getOffsets(); - const auto & second_offsets = second_array_column.getOffsets(); - for (size_t i = 0; i < input_rows_count; ++i) + MutableColumns res_tuple_columns(tuple_columns.size()); + for (size_t i = 0; i < tuple_columns.size(); ++i) { - size_t first_size = first_offsets[i] - first_offsets[i - 1]; - size_t second_size = second_offsets[i] - second_offsets[i - 1]; - - res_first_data->insertRangeFrom(*nullable_first_data, first_offsets[i - 1], first_size); - res_second_data->insertRangeFrom(*nullable_second_data, second_offsets[i - 1], second_size); - - if (first_size < second_size) - res_first_data->insertManyDefaults(second_size - first_size); - else if (first_size > second_size) - res_second_data->insertManyDefaults(first_size - second_size); - - res_offsets[i] = std::max(first_size, second_size); + tuple_columns[i] = makeNullable(tuple_columns[i]); + res_tuple_columns[i] = tuple_columns[i]->cloneEmpty(); + res_tuple_columns[i]->reserve(tuple_columns[i]->size()); } - Columns tuple_columns{std::move(res_first_data), std::move(res_second_data)}; - return ColumnArray::create(ColumnTuple::create(std::move(tuple_columns)), std::move(res_offsets_column)); + auto res_offsets_column = ColumnArray::ColumnOffsets::create(input_rows_count); + auto & res_offsets = assert_cast(*res_offsets_column).getData(); + for (size_t row_i = 0; row_i < input_rows_count; ++row_i) + { + size_t max_size = 0; + for (size_t arg_i = 0; arg_i < holders.size(); ++arg_i) + { + const auto * array_column = array_columns[arg_i]; + const auto & offsets = array_column->getOffsets(); + size_t array_offset = offsets[row_i - 1]; + size_t array_size = offsets[row_i] - array_offset; + + res_tuple_columns[arg_i]->insertRangeFrom(*tuple_columns[arg_i], array_offset, array_size); + max_size = std::max(max_size, array_size); + } + + for (size_t arg_i = 0; arg_i < holders.size(); ++arg_i) + { + const auto * array_column = array_columns[arg_i]; + const auto & offsets = array_column->getOffsets(); + size_t array_offset = offsets[row_i - 1]; + size_t array_size = offsets[row_i] - array_offset; + + res_tuple_columns[arg_i]->insertManyDefaults(max_size - array_size); + res_offsets[row_i] = max_size; + } + } + + return ColumnArray::create(ColumnTuple::create(std::move(res_tuple_columns)), std::move(res_offsets_column)); } }; diff --git a/tests/queries/0_stateless/03230_array_zip_unaligned.reference b/tests/queries/0_stateless/03230_array_zip_unaligned.reference new file mode 100644 index 00000000000..6da23cd3f37 --- /dev/null +++ b/tests/queries/0_stateless/03230_array_zip_unaligned.reference @@ -0,0 +1,5 @@ +[('a','d'),('b','e'),('c','f')] Array(Tuple(Nullable(String), Nullable(String))) +[('a','d','g'),('b','e','h'),('c','f','i')] +[('a','d'),('b','e'),('c','f'),(NULL,'g')] +[('a',1),(NULL,2),(NULL,3)] +[('a',1,1.1),('b',2,2.2),('c',NULL,3.3),(NULL,NULL,4.4)] diff --git a/tests/queries/0_stateless/03230_array_zip_unaligned.sql b/tests/queries/0_stateless/03230_array_zip_unaligned.sql new file mode 100644 index 00000000000..6fa068d53cd --- /dev/null +++ b/tests/queries/0_stateless/03230_array_zip_unaligned.sql @@ -0,0 +1,13 @@ +SELECT arrayZipUnaligned(['a', 'b', 'c'], ['d', 'e', 'f']) as x, toTypeName(x); + +SELECT arrayZipUnaligned(['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']); + +SELECT arrayZipUnaligned(); -- { serverError TOO_FEW_ARGUMENTS_FOR_FUNCTION } + +SELECT arrayZipUnaligned('a', 'b', 'c'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT arrayZipUnaligned(['a', 'b', 'c'], ['d', 'e', 'f', 'g']); + +SELECT arrayZipUnaligned(['a'], [1, 2, 3]); + +SELECT arrayZipUnaligned(['a', 'b', 'c'], [1, 2], [1.1, 2.2, 3.3, 4.4]); From 450730cef800d08104c753335b3ea2d94a290dbf Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 2 Sep 2024 12:16:41 +0800 Subject: [PATCH 09/20] fix failed uts --- src/Functions/array/arrayZip.cpp | 17 +++++++++++++++-- ...l_new_functions_must_be_documented.reference | 1 - 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/Functions/array/arrayZip.cpp b/src/Functions/array/arrayZip.cpp index 494ee0d9590..11ea824d9a5 100644 --- a/src/Functions/array/arrayZip.cpp +++ b/src/Functions/array/arrayZip.cpp @@ -174,8 +174,21 @@ private: REGISTER_FUNCTION(ArrayZip) { - factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction>( + {.description = R"( +Combines multiple arrays into a single array. The resulting array contains the corresponding elements of the source arrays grouped into tuples in the listed order of arguments. +)", + .categories{"String"}}); + + factory.registerFunction>( + {.description = R"( +Combines multiple arrays into a single array, allowing for unaligned arrays. The resulting array contains the corresponding elements of the source arrays grouped into tuples in the listed order of arguments. + +If the arrays have different sizes, the shorter arrays will be padded with `null` values. +)", + .categories{"String"}} + + ); } } diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 0980e25b70f..1368bf530c8 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -143,7 +143,6 @@ arrayStringConcat arraySum arrayUniq arrayWithConstant -arrayZip asinh assumeNotNull atan From bd0ddf85eb987517e99edc467a2206ad4033f64f Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 2 Sep 2024 14:16:20 +0800 Subject: [PATCH 10/20] fix style --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 7f90fc4664e..cabfd683ead 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1208,6 +1208,7 @@ arraySum arrayUniq arrayWithConstant arrayZip +arrayZipUnaligned ascii asin asinh From aaf62aca737439abeb8109944b0becf88093e96d Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 3 Sep 2024 11:59:54 +0800 Subject: [PATCH 11/20] fix uts --- src/Functions/array/arrayZip.cpp | 15 ++++----------- .../03230_array_zip_unaligned.reference | 3 +++ .../0_stateless/03230_array_zip_unaligned.sql | 2 ++ 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/Functions/array/arrayZip.cpp b/src/Functions/array/arrayZip.cpp index 11ea824d9a5..cc66c560800 100644 --- a/src/Functions/array/arrayZip.cpp +++ b/src/Functions/array/arrayZip.cpp @@ -94,16 +94,6 @@ public: if (i && !column_array->hasEqualOffsets(static_cast(*holders[0]))) { - /* - if constexpr (allow_unaligned) - return executeUnaligned(static_cast(*first_array_column), *column_array, input_rows_count); - else - throw Exception( - ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, - "The argument 1 and argument {} of function {} have different array sizes", - i + 1, - getName()); - */ has_unaligned = true; unaligned_index = i; } @@ -142,6 +132,7 @@ private: auto res_offsets_column = ColumnArray::ColumnOffsets::create(input_rows_count); auto & res_offsets = assert_cast(*res_offsets_column).getData(); + size_t curr_offset = 0; for (size_t row_i = 0; row_i < input_rows_count; ++row_i) { size_t max_size = 0; @@ -164,8 +155,10 @@ private: size_t array_size = offsets[row_i] - array_offset; res_tuple_columns[arg_i]->insertManyDefaults(max_size - array_size); - res_offsets[row_i] = max_size; } + + curr_offset += max_size; + res_offsets[row_i] = curr_offset; } return ColumnArray::create(ColumnTuple::create(std::move(res_tuple_columns)), std::move(res_offsets_column)); diff --git a/tests/queries/0_stateless/03230_array_zip_unaligned.reference b/tests/queries/0_stateless/03230_array_zip_unaligned.reference index 6da23cd3f37..7067f8788e5 100644 --- a/tests/queries/0_stateless/03230_array_zip_unaligned.reference +++ b/tests/queries/0_stateless/03230_array_zip_unaligned.reference @@ -3,3 +3,6 @@ [('a','d'),('b','e'),('c','f'),(NULL,'g')] [('a',1),(NULL,2),(NULL,3)] [('a',1,1.1),('b',2,2.2),('c',NULL,3.3),(NULL,NULL,4.4)] +[('g'),('h'),('i')] +[('g'),('h'),('i')] +[('g'),('h'),('i')] diff --git a/tests/queries/0_stateless/03230_array_zip_unaligned.sql b/tests/queries/0_stateless/03230_array_zip_unaligned.sql index 6fa068d53cd..90b7aa47bfd 100644 --- a/tests/queries/0_stateless/03230_array_zip_unaligned.sql +++ b/tests/queries/0_stateless/03230_array_zip_unaligned.sql @@ -11,3 +11,5 @@ SELECT arrayZipUnaligned(['a', 'b', 'c'], ['d', 'e', 'f', 'g']); SELECT arrayZipUnaligned(['a'], [1, 2, 3]); SELECT arrayZipUnaligned(['a', 'b', 'c'], [1, 2], [1.1, 2.2, 3.3, 4.4]); + +SELECT arrayZipUnaligned(materialize(['g', 'h', 'i'])) from numbers(3); From 7e03621a409c5bce283b71269e50c41ce193a347 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Wed, 4 Sep 2024 05:43:56 +0000 Subject: [PATCH 12/20] Fix INFILE file format detection for async inserts --- src/Interpreters/AsynchronousInsertQueue.cpp | 13 +++++++++- ...03233_async_insert_infile_format.reference | 2 ++ .../03233_async_insert_infile_format.sh | 25 +++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03233_async_insert_infile_format.reference create mode 100755 tests/queries/0_stateless/03233_async_insert_infile_format.sh diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 461700dcb75..25cd1d0bfa2 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -315,7 +315,18 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); if (!FormatFactory::instance().isInputFormat(insert_query.format)) - throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input format {}", insert_query.format); + { + if (insert_query.format.empty() && insert_query.infile) + { + const auto & in_file_node = insert_query.infile->as(); + const auto in_file = in_file_node.value.safeGet(); + const auto in_file_format = FormatFactory::instance().getFormatFromFileName(in_file); + if (!FormatFactory::instance().isInputFormat(in_file_format)) + throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input INFILE format {}", in_file_format); + } + else + throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input format {}", insert_query.format); + } /// For table functions we check access while executing /// InterpreterInsertQuery::getTable() -> ITableFunction::execute(). diff --git a/tests/queries/0_stateless/03233_async_insert_infile_format.reference b/tests/queries/0_stateless/03233_async_insert_infile_format.reference new file mode 100644 index 00000000000..b236c1b8330 --- /dev/null +++ b/tests/queries/0_stateless/03233_async_insert_infile_format.reference @@ -0,0 +1,2 @@ +1 ClickHouse +2 HelloWorld diff --git a/tests/queries/0_stateless/03233_async_insert_infile_format.sh b/tests/queries/0_stateless/03233_async_insert_infile_format.sh new file mode 100755 index 00000000000..29ec87799bb --- /dev/null +++ b/tests/queries/0_stateless/03233_async_insert_infile_format.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function cleanup() +{ + [ -e "${CLICKHOUSE_TMP}"/test_infile.csv ] && rm "${CLICKHOUSE_TMP}"/test_infile.csv +} + +trap cleanup EXIT + +cleanup + +echo -e "id,\"word\"\n1,\"ClickHouse\"\n2,\"HelloWorld\"" > "${CLICKHOUSE_TMP}"/test_infile.csv + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS async_insert_infile_data;" +${CLICKHOUSE_CLIENT} --query "CREATE TABLE async_insert_infile_data (id UInt32, word String) ENGINE=Memory();" +${CLICKHOUSE_CLIENT} --query "INSERT INTO async_insert_infile_data FROM INFILE '${CLICKHOUSE_TMP}/test_infile.csv' SETTINGS async_insert=1;" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM async_insert_infile_data ORDER BY id;" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO async_insert_infile_data FROM INFILE '${CLICKHOUSE_TMP}/test_infile.csv' FORMAT NotExists SETTINGS async_insert=1;" 2>&1 | grep -q "UNKOWN_FORMAT" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE async_insert_infile_data SYNC;" From ac861768bcfeab9ae830651e35da1bb40398b0a2 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 4 Sep 2024 07:53:41 +0000 Subject: [PATCH 13/20] Final submodule commit --- contrib/sysroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sysroot b/contrib/sysroot index b0fce6066fc..5be834147d5 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit b0fce6066fc2678fa17ee7a98f794da9da8492ff +Subproject commit 5be834147d5b5dd77ca2b821f356982029320513 From 73bbc16250d6b5dffa6a6bf346552706a89fe6c6 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 4 Sep 2024 12:39:15 +0200 Subject: [PATCH 14/20] Update tests/queries/0_stateless/03233_async_insert_infile_format.sh Co-authored-by: vdimir --- tests/queries/0_stateless/03233_async_insert_infile_format.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03233_async_insert_infile_format.sh b/tests/queries/0_stateless/03233_async_insert_infile_format.sh index 29ec87799bb..ae8bf262833 100755 --- a/tests/queries/0_stateless/03233_async_insert_infile_format.sh +++ b/tests/queries/0_stateless/03233_async_insert_infile_format.sh @@ -20,6 +20,6 @@ ${CLICKHOUSE_CLIENT} --query "CREATE TABLE async_insert_infile_data (id UInt32, ${CLICKHOUSE_CLIENT} --query "INSERT INTO async_insert_infile_data FROM INFILE '${CLICKHOUSE_TMP}/test_infile.csv' SETTINGS async_insert=1;" ${CLICKHOUSE_CLIENT} --query "SELECT * FROM async_insert_infile_data ORDER BY id;" -${CLICKHOUSE_CLIENT} --query "INSERT INTO async_insert_infile_data FROM INFILE '${CLICKHOUSE_TMP}/test_infile.csv' FORMAT NotExists SETTINGS async_insert=1;" 2>&1 | grep -q "UNKOWN_FORMAT" +${CLICKHOUSE_CLIENT} --query "INSERT INTO async_insert_infile_data FROM INFILE '${CLICKHOUSE_TMP}/test_infile.csv' FORMAT NotExists SETTINGS async_insert=1;" 2>&1 | grep -q "UNKNOWN_FORMAT" && echo OK || echo FAIL ${CLICKHOUSE_CLIENT} --query "DROP TABLE async_insert_infile_data SYNC;" From fd05f30ee1009a91a8a18756c620da9316b09647 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 4 Sep 2024 21:10:32 +0800 Subject: [PATCH 15/20] change as request --- src/Functions/array/arrayZip.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Functions/array/arrayZip.cpp b/src/Functions/array/arrayZip.cpp index cc66c560800..6e1cc0f7788 100644 --- a/src/Functions/array/arrayZip.cpp +++ b/src/Functions/array/arrayZip.cpp @@ -112,20 +112,25 @@ public: ColumnTuple::create(std::move(tuple_columns)), static_cast(*holders[0]).getOffsetsPtr()); } else - return executeUnaligned(holders, tuple_columns, input_rows_count); + return executeUnaligned(holders, tuple_columns, input_rows_count, has_unaligned); } private: - ColumnPtr executeUnaligned(const Columns & holders, Columns & tuple_columns, size_t input_rows_count) const + ColumnPtr executeUnaligned(const Columns & holders, Columns & tuple_columns, size_t input_rows_count, bool has_unaligned) const { std::vector array_columns(holders.size()); for (size_t i = 0; i < holders.size(); ++i) array_columns[i] = checkAndGetColumn(holders[i].get()); + for (auto & tuple_column : tuple_columns) + tuple_column = makeNullable(tuple_column); + + if (!has_unaligned) + return ColumnArray::create(ColumnTuple::create(std::move(tuple_columns)), array_columns[0]->getOffsetsPtr()); + MutableColumns res_tuple_columns(tuple_columns.size()); for (size_t i = 0; i < tuple_columns.size(); ++i) { - tuple_columns[i] = makeNullable(tuple_columns[i]); res_tuple_columns[i] = tuple_columns[i]->cloneEmpty(); res_tuple_columns[i]->reserve(tuple_columns[i]->size()); } From 4634b83ab51af82f3e15fa26139f6189993591bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 4 Sep 2024 18:44:35 +0200 Subject: [PATCH 16/20] Revert "CREATE TABLE AS copy PRIMARY KEY, ORDER BY, and similar clauses." --- src/Interpreters/InterpreterCreateQuery.cpp | 13 ---- ...te_table_as_with_sorting_clauses.reference | 70 ------------------- ...6_create_table_as_with_sorting_clauses.sql | 37 ---------- 3 files changed, 120 deletions(-) delete mode 100644 tests/queries/0_stateless/01056_create_table_as_with_sorting_clauses.reference delete mode 100644 tests/queries/0_stateless/01056_create_table_as_with_sorting_clauses.sql diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index e9f40bdbaf5..80cb0510b35 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -821,19 +821,6 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti { properties.indices = as_storage_metadata->getSecondaryIndices(); properties.projections = as_storage_metadata->getProjections().clone(); - - /// CREATE TABLE AS should copy PRIMARY KEY, ORDER BY, and similar clauses. - if (!create.storage->primary_key && as_storage_metadata->isPrimaryKeyDefined() && as_storage_metadata->hasPrimaryKey()) - create.storage->set(create.storage->primary_key, as_storage_metadata->getPrimaryKeyAST()->clone()); - - if (!create.storage->partition_by && as_storage_metadata->isPartitionKeyDefined() && as_storage_metadata->hasPartitionKey()) - create.storage->set(create.storage->partition_by, as_storage_metadata->getPartitionKeyAST()->clone()); - - if (!create.storage->order_by && as_storage_metadata->isSortingKeyDefined() && as_storage_metadata->hasSortingKey()) - create.storage->set(create.storage->order_by, as_storage_metadata->getSortingKeyAST()->clone()); - - if (!create.storage->sample_by && as_storage_metadata->isSamplingKeyDefined() && as_storage_metadata->hasSamplingKey()) - create.storage->set(create.storage->sample_by, as_storage_metadata->getSamplingKeyAST()->clone()); } else { diff --git a/tests/queries/0_stateless/01056_create_table_as_with_sorting_clauses.reference b/tests/queries/0_stateless/01056_create_table_as_with_sorting_clauses.reference deleted file mode 100644 index cebb99f005e..00000000000 --- a/tests/queries/0_stateless/01056_create_table_as_with_sorting_clauses.reference +++ /dev/null @@ -1,70 +0,0 @@ --------------- Test copy sorting clauses from source table -------------- -CREATE TABLE default.x -( - `CounterID` UInt32, - `EventDate` Date, - `UserID` UInt64 -) -ENGINE = MergeTree -PARTITION BY toYYYYMM(EventDate) -ORDER BY (CounterID, EventDate, intHash32(UserID)) -SAMPLE BY intHash32(UserID) -SETTINGS index_granularity = 8192 -------------------------------------------------------------------------- -CREATE TABLE default.x_as -( - `CounterID` UInt32, - `EventDate` Date, - `UserID` UInt64 -) -ENGINE = MergeTree -PARTITION BY toYYYYMM(EventDate) -ORDER BY (CounterID, EventDate, intHash32(UserID)) -SAMPLE BY intHash32(UserID) -SETTINGS enable_block_number_column = 1, enable_block_offset_column = 1, index_granularity = 8192 --------------- Test copy sorting clauses from destination table (source table without the same type clauses) -------------- -CREATE TABLE default.x -( - `CounterID` UInt32, - `EventDate` Date, - `UserID` UInt64 -) -ENGINE = MergeTree -PRIMARY KEY (CounterID, EventDate, intHash32(UserID)) -ORDER BY (CounterID, EventDate, intHash32(UserID)) -SETTINGS index_granularity = 8192 -------------------------------------------------------------------------- -CREATE TABLE default.x_as -( - `CounterID` UInt32, - `EventDate` Date, - `UserID` UInt64 -) -ENGINE = MergeTree -PARTITION BY toYYYYMM(EventDate) -PRIMARY KEY (CounterID, EventDate, intHash32(UserID)) -ORDER BY (CounterID, EventDate, intHash32(UserID)) -SAMPLE BY intHash32(UserID) -SETTINGS enable_block_number_column = 1, enable_block_offset_column = 1, index_granularity = 8192 --------------- Test copy sorting clauses from destination table (source table with the same type clauses) -------------- -CREATE TABLE default.x -( - `CounterID` UInt32, - `EventDate` Date, - `UserID` UInt64 -) -ENGINE = MergeTree -ORDER BY CounterID -SETTINGS index_granularity = 8192 -------------------------------------------------------------------------- -CREATE TABLE default.x_as -( - `CounterID` UInt32, - `EventDate` Date, - `UserID` UInt64 -) -ENGINE = MergeTree -PARTITION BY toYYYYMM(EventDate) -ORDER BY (CounterID, EventDate, intHash32(UserID)) -SAMPLE BY intHash32(UserID) -SETTINGS enable_block_number_column = 1, enable_block_offset_column = 1, index_granularity = 8192 diff --git a/tests/queries/0_stateless/01056_create_table_as_with_sorting_clauses.sql b/tests/queries/0_stateless/01056_create_table_as_with_sorting_clauses.sql deleted file mode 100644 index 96c2df54491..00000000000 --- a/tests/queries/0_stateless/01056_create_table_as_with_sorting_clauses.sql +++ /dev/null @@ -1,37 +0,0 @@ -DROP TABLE IF EXISTS x; -DROP TABLE IF EXISTS x_as; - -SELECT '-------------- Test copy sorting clauses from source table --------------'; -CREATE TABLE x (`CounterID` UInt32, `EventDate` Date, `UserID` UInt64) ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID); -CREATE TABLE x_as AS x ENGINE = MergeTree SETTINGS enable_block_number_column = 1, enable_block_offset_column = 1; - -SHOW CREATE TABLE x FORMAT TSVRaw; -SELECT '-------------------------------------------------------------------------'; -SHOW CREATE TABLE x_as FORMAT TSVRaw; - -DROP TABLE x; -DROP TABLE x_as; - -SELECT '-------------- Test copy sorting clauses from destination table (source table without the same type clauses) --------------'; -CREATE TABLE x (`CounterID` UInt32, `EventDate` Date, `UserID` UInt64) ENGINE = MergeTree PRIMARY KEY (CounterID, EventDate, intHash32(UserID)); -CREATE TABLE x_as AS x ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS enable_block_number_column = 1, enable_block_offset_column = 1; - -SHOW CREATE TABLE x FORMAT TSVRaw; -SELECT '-------------------------------------------------------------------------'; -SHOW CREATE TABLE x_as FORMAT TSVRaw; - -DROP TABLE x; -DROP TABLE x_as; - -SELECT '-------------- Test copy sorting clauses from destination table (source table with the same type clauses) --------------'; -CREATE TABLE x (`CounterID` UInt32, `EventDate` Date, `UserID` UInt64) ENGINE = MergeTree ORDER BY (CounterID); -CREATE TABLE x_as AS x ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS enable_block_number_column = 1, enable_block_offset_column = 1; - -SHOW CREATE TABLE x FORMAT TSVRaw; -SELECT '-------------------------------------------------------------------------'; -SHOW CREATE TABLE x_as FORMAT TSVRaw; - -DROP TABLE x; -DROP TABLE x_as; - - From c10455ca036264ccbdfff1e9778697a3750d52c4 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 4 Sep 2024 19:36:16 +0200 Subject: [PATCH 17/20] Minor improvements for Lemmatizers --- src/Interpreters/Lemmatizers.cpp | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/Interpreters/Lemmatizers.cpp b/src/Interpreters/Lemmatizers.cpp index c24679de76e..26b57bfcf44 100644 --- a/src/Interpreters/Lemmatizers.cpp +++ b/src/Interpreters/Lemmatizers.cpp @@ -33,25 +33,16 @@ public: } }; -/// Duplicate of code from StringUtils.h. Copied here for less dependencies. -static bool startsWith(const std::string & s, const char * prefix) -{ - return s.size() >= strlen(prefix) && 0 == memcmp(s.data(), prefix, strlen(prefix)); -} - Lemmatizers::Lemmatizers(const Poco::Util::AbstractConfiguration & config) { - String prefix = "lemmatizers"; + const String prefix = "lemmatizers"; Poco::Util::AbstractConfiguration::Keys keys; - if (!config.has(prefix)) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "No lemmatizers specified in server config on prefix '{}'", prefix); - config.keys(prefix, keys); for (const auto & key : keys) { - if (startsWith(key, "lemmatizer")) + if (key.starts_with("lemmatizer")) { const auto & lemm_name = config.getString(prefix + "." + key + ".lang", ""); const auto & lemm_path = config.getString(prefix + "." + key + ".path", ""); @@ -81,13 +72,13 @@ Lemmatizers::LemmPtr Lemmatizers::getLemmatizer(const String & name) if (paths.find(name) != paths.end()) { if (!std::filesystem::exists(paths[name])) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Incorrect path to lemmatizer: {}", paths[name]); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Path to lemmatizer does not exist: {}", paths[name]); lemmatizers[name] = std::make_shared(paths[name]); return lemmatizers[name]; } - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Lemmatizer named: '{}' is not found", name); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Lemmatizer with the name '{}' was not found in the configuration", name); } } From 89ca1c0759aeb46d9cd85483bfe2324c439be078 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 4 Sep 2024 20:08:27 +0200 Subject: [PATCH 18/20] Update Lemmatizers.cpp --- src/Interpreters/Lemmatizers.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/Lemmatizers.cpp b/src/Interpreters/Lemmatizers.cpp index 26b57bfcf44..05e91e7cea8 100644 --- a/src/Interpreters/Lemmatizers.cpp +++ b/src/Interpreters/Lemmatizers.cpp @@ -36,8 +36,10 @@ public: Lemmatizers::Lemmatizers(const Poco::Util::AbstractConfiguration & config) { const String prefix = "lemmatizers"; + if (!config.has(prefix)) + return; + Poco::Util::AbstractConfiguration::Keys keys; - config.keys(prefix, keys); for (const auto & key : keys) From 91447fabf7af2e9f6781ee3efc899acf4dfe0cae Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 4 Sep 2024 20:17:59 +0200 Subject: [PATCH 19/20] Fix style --- src/Interpreters/Lemmatizers.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/Lemmatizers.cpp b/src/Interpreters/Lemmatizers.cpp index 05e91e7cea8..c583108cf69 100644 --- a/src/Interpreters/Lemmatizers.cpp +++ b/src/Interpreters/Lemmatizers.cpp @@ -36,9 +36,10 @@ public: Lemmatizers::Lemmatizers(const Poco::Util::AbstractConfiguration & config) { const String prefix = "lemmatizers"; + if (!config.has(prefix)) return; - + Poco::Util::AbstractConfiguration::Keys keys; config.keys(prefix, keys); From 19b33e87eb94bc78e836ac54152f99cfe7eda717 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Wed, 4 Sep 2024 19:44:01 +0000 Subject: [PATCH 20/20] fix test --- .../0_stateless/03233_async_insert_infile_format.reference | 1 + tests/queries/0_stateless/03233_async_insert_infile_format.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03233_async_insert_infile_format.reference b/tests/queries/0_stateless/03233_async_insert_infile_format.reference index b236c1b8330..ae99738bcc5 100644 --- a/tests/queries/0_stateless/03233_async_insert_infile_format.reference +++ b/tests/queries/0_stateless/03233_async_insert_infile_format.reference @@ -1,2 +1,3 @@ 1 ClickHouse 2 HelloWorld +OK diff --git a/tests/queries/0_stateless/03233_async_insert_infile_format.sh b/tests/queries/0_stateless/03233_async_insert_infile_format.sh index ae8bf262833..2c167b42d51 100755 --- a/tests/queries/0_stateless/03233_async_insert_infile_format.sh +++ b/tests/queries/0_stateless/03233_async_insert_infile_format.sh @@ -20,6 +20,6 @@ ${CLICKHOUSE_CLIENT} --query "CREATE TABLE async_insert_infile_data (id UInt32, ${CLICKHOUSE_CLIENT} --query "INSERT INTO async_insert_infile_data FROM INFILE '${CLICKHOUSE_TMP}/test_infile.csv' SETTINGS async_insert=1;" ${CLICKHOUSE_CLIENT} --query "SELECT * FROM async_insert_infile_data ORDER BY id;" -${CLICKHOUSE_CLIENT} --query "INSERT INTO async_insert_infile_data FROM INFILE '${CLICKHOUSE_TMP}/test_infile.csv' FORMAT NotExists SETTINGS async_insert=1;" 2>&1 | grep -q "UNKNOWN_FORMAT" && echo OK || echo FAIL +${CLICKHOUSE_CLIENT} --query "INSERT INTO async_insert_infile_data FROM INFILE '${CLICKHOUSE_TMP}/test_infile.csv' SETTINGS async_insert=1 FORMAT NotExists;" 2>&1 | grep -q "UNKNOWN_FORMAT" && echo OK || echo FAIL ${CLICKHOUSE_CLIENT} --query "DROP TABLE async_insert_infile_data SYNC;"