From 5b3b11b517d79552c3956dc267bb16930113639d Mon Sep 17 00:00:00 2001 From: Constantine Peresypkin Date: Sun, 25 Sep 2022 19:58:00 +0200 Subject: [PATCH 1/5] add Morton Coding (ZCurve) --- .gitmodules | 3 + cmake/cpu_features.cmake | 16 + contrib/CMakeLists.txt | 1 + contrib/morton-nd | 1 + contrib/morton-nd-cmake/CMakeLists.txt | 3 + docker/test/fasttest/run.sh | 1 + src/Functions/CMakeLists.txt | 1 + src/Functions/mortonDecode.cpp | 433 ++++++++++++++++++ src/Functions/mortonEncode.cpp | 393 ++++++++++++++++ .../0_stateless/02457_morton_coding.reference | 12 + .../0_stateless/02457_morton_coding.sql | 137 ++++++ .../02457_morton_coding_with_mask.reference | 15 + .../02457_morton_coding_with_mask.sql | 143 ++++++ 13 files changed, 1159 insertions(+) create mode 160000 contrib/morton-nd create mode 100644 contrib/morton-nd-cmake/CMakeLists.txt create mode 100644 src/Functions/mortonDecode.cpp create mode 100644 src/Functions/mortonEncode.cpp create mode 100644 tests/queries/0_stateless/02457_morton_coding.reference create mode 100644 tests/queries/0_stateless/02457_morton_coding.sql create mode 100644 tests/queries/0_stateless/02457_morton_coding_with_mask.reference create mode 100644 tests/queries/0_stateless/02457_morton_coding_with_mask.sql diff --git a/.gitmodules b/.gitmodules index abd29c38846..293029ad171 100644 --- a/.gitmodules +++ b/.gitmodules @@ -287,3 +287,6 @@ [submodule "contrib/corrosion"] path = contrib/corrosion url = https://github.com/corrosion-rs/corrosion.git +[submodule "contrib/morton-nd"] + path = contrib/morton-nd + url = https://github.com/morton-nd/morton-nd diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 6707d703372..aef31251a56 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -75,6 +75,7 @@ elseif (ARCH_AMD64) option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0) option (ENABLE_AVX512_VBMI "Use AVX512_VBMI instruction on x86_64 (depends on ENABLE_AVX512)" 0) option (ENABLE_BMI "Use BMI instructions on x86_64" 0) + option (ENABLE_BMI2 "Use BMI2 instructions on x86_64 (depends on ENABLE_AVX2)" 0) option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0) option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0) @@ -90,6 +91,7 @@ elseif (ARCH_AMD64) SET(ENABLE_AVX512 0) SET(ENABLE_AVX512_VBMI 0) SET(ENABLE_BMI 0) + SET(ENABLE_BMI2 0) SET(ENABLE_AVX2_FOR_SPEC_OP 0) SET(ENABLE_AVX512_FOR_SPEC_OP 0) endif() @@ -237,6 +239,20 @@ elseif (ARCH_AMD64) set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") endif () + set (TEST_FLAG "-mbmi2") + set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") + check_cxx_source_compiles(" + #include + int main() { + auto a = _pdep_u64(0, 0); + (void)a; + return 0; + } + " HAVE_BMI2) + if (HAVE_BMI2 AND HAVE_AVX2 AND ENABLE_AVX2 AND ENABLE_BMI2) + set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") + endif () + # Limit avx2/avx512 flag for specific source build set (X86_INTRINSICS_FLAGS "") if (ENABLE_AVX2_FOR_SPEC_OP) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index f914c0d2d3f..3017c1a8fc0 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -164,6 +164,7 @@ add_contrib (sqlite-cmake sqlite-amalgamation) add_contrib (s2geometry-cmake s2geometry) add_contrib (c-ares-cmake c-ares) add_contrib (qpl-cmake qpl) +add_contrib (morton-nd-cmake morton-nd) add_contrib(annoy-cmake annoy) diff --git a/contrib/morton-nd b/contrib/morton-nd new file mode 160000 index 00000000000..3795491a4aa --- /dev/null +++ b/contrib/morton-nd @@ -0,0 +1 @@ +Subproject commit 3795491a4aa3cdc916c8583094683f0d68df5bc0 diff --git a/contrib/morton-nd-cmake/CMakeLists.txt b/contrib/morton-nd-cmake/CMakeLists.txt new file mode 100644 index 00000000000..4842781503f --- /dev/null +++ b/contrib/morton-nd-cmake/CMakeLists.txt @@ -0,0 +1,3 @@ +add_library(_morton_nd INTERFACE) +target_include_directories(_morton_nd SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/morton-nd/include/") +add_library(ch_contrib::morton_nd ALIAS _morton_nd) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 9d6cf22c817..de9125d565b 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -136,6 +136,7 @@ function clone_submodules contrib/wyhash contrib/hashidsxx contrib/c-ares + contrib/morton-nd ) git submodule sync diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index ad36c51447f..c84e23da85b 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -22,6 +22,7 @@ list (APPEND PUBLIC_LIBS ch_contrib::metrohash ch_contrib::murmurhash ch_contrib::hashidsxx + ch_contrib::morton_nd ) list (APPEND PRIVATE_LIBS diff --git a/src/Functions/mortonDecode.cpp b/src/Functions/mortonDecode.cpp new file mode 100644 index 00000000000..337fd5e3a38 --- /dev/null +++ b/src/Functions/mortonDecode.cpp @@ -0,0 +1,433 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#if USE_MULTITARGET_CODE && defined(__BMI2__) +#include +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; + extern const int ARGUMENT_OUT_OF_BOUND; +} + +#define EXTRACT_VECTOR(INDEX) \ + auto col##INDEX = ColumnUInt64::create(); \ + auto & vec##INDEX = col##INDEX->getData(); \ + vec##INDEX.resize(input_rows_count); + +#define DECODE(ND, ...) \ + if (nd == (ND)) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + auto res = MortonND_##ND##D_Dec.Decode(col_code->getUInt(i)); \ + __VA_ARGS__ \ + } \ + } + +#define MASK(IDX, ...) \ + ((mask) ? shrink(mask->getColumn((IDX)).getUInt(0), std::get(__VA_ARGS__)) : std::get(__VA_ARGS__)) + +#define EXECUTE() \ + size_t nd; \ + const auto * col_const = typeid_cast(arguments[0].column.get()); \ + const auto * mask = typeid_cast(col_const->getDataColumnPtr().get()); \ + if (mask) \ + nd = mask->tupleSize(); \ + else \ + nd = col_const->getUInt(0); \ + auto non_const_arguments = arguments; \ + non_const_arguments[1].column = non_const_arguments[1].column->convertToFullColumnIfConst(); \ + const ColumnPtr & col_code = non_const_arguments[1].column; \ + Columns tuple_columns(nd); \ + EXTRACT_VECTOR(0) \ + if (nd == 1) \ + { \ + if (mask) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + vec0[i] = shrink(mask->getColumn(0).getUInt(0), col_code->getUInt(i)); \ + } \ + tuple_columns[0] = std::move(col0); \ + } \ + else \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + vec0[i] = col_code->getUInt(i); \ + } \ + tuple_columns[0] = std::move(col0); \ + } \ + return ColumnTuple::create(tuple_columns); \ + } \ + EXTRACT_VECTOR(1) \ + DECODE(2, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res);) \ + EXTRACT_VECTOR(2) \ + DECODE(3, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res);) \ + EXTRACT_VECTOR(3) \ + DECODE(4, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res); \ + vec3[i] = MASK(3, res);) \ + EXTRACT_VECTOR(4) \ + DECODE(5, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res); \ + vec3[i] = MASK(3, res); \ + vec4[i] = MASK(4, res);) \ + EXTRACT_VECTOR(5) \ + DECODE(6, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res); \ + vec3[i] = MASK(3, res); \ + vec4[i] = MASK(4, res); \ + vec5[i] = MASK(5, res);) \ + EXTRACT_VECTOR(6) \ + DECODE(7, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res); \ + vec3[i] = MASK(3, res); \ + vec4[i] = MASK(4, res); \ + vec5[i] = MASK(5, res); \ + vec6[i] = MASK(6, res);) \ + EXTRACT_VECTOR(7) \ + DECODE(8, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res); \ + vec3[i] = MASK(3, res); \ + vec4[i] = MASK(4, res); \ + vec5[i] = MASK(5, res); \ + vec6[i] = MASK(6, res); \ + vec7[i] = MASK(7, res);) \ + switch (nd) \ + { \ + case 2: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + break; \ + case 3: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + return ColumnTuple::create(tuple_columns); \ + case 4: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + tuple_columns[3] = std::move(col3); \ + return ColumnTuple::create(tuple_columns); \ + case 5: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + tuple_columns[3] = std::move(col3); \ + tuple_columns[4] = std::move(col4); \ + return ColumnTuple::create(tuple_columns); \ + case 6: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + tuple_columns[3] = std::move(col3); \ + tuple_columns[4] = std::move(col4); \ + tuple_columns[5] = std::move(col5); \ + return ColumnTuple::create(tuple_columns); \ + case 7: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + tuple_columns[3] = std::move(col3); \ + tuple_columns[4] = std::move(col4); \ + tuple_columns[5] = std::move(col5); \ + tuple_columns[6] = std::move(col6); \ + return ColumnTuple::create(tuple_columns); \ + case 8: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + tuple_columns[3] = std::move(col3); \ + tuple_columns[4] = std::move(col4); \ + tuple_columns[5] = std::move(col5); \ + tuple_columns[6] = std::move(col6); \ + tuple_columns[7] = std::move(col7); \ + return ColumnTuple::create(tuple_columns); \ + } \ + return ColumnTuple::create(tuple_columns); + +DECLARE_DEFAULT_CODE( +constexpr auto MortonND_2D_Dec = mortonnd::MortonNDLutDecoder<2, 32, 8>(); +constexpr auto MortonND_3D_Dec = mortonnd::MortonNDLutDecoder<3, 21, 8>(); +constexpr auto MortonND_4D_Dec = mortonnd::MortonNDLutDecoder<4, 16, 8>(); +constexpr auto MortonND_5D_Dec = mortonnd::MortonNDLutDecoder<5, 12, 8>(); +constexpr auto MortonND_6D_Dec = mortonnd::MortonNDLutDecoder<6, 10, 8>(); +constexpr auto MortonND_7D_Dec = mortonnd::MortonNDLutDecoder<7, 9, 8>(); +constexpr auto MortonND_8D_Dec = mortonnd::MortonNDLutDecoder<8, 8, 8>(); +class FunctionMortonDecode : public IFunction +{ +public: + static constexpr auto name = "mortonDecode"; + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override + { + return 2; + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + UInt64 tuple_size = 0; + const auto * col_const = typeid_cast(arguments[0].column.get()); + if (!col_const) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column type {} of function {}, should be a constant (UInt or Tuple)", + arguments[0].type->getName(), getName()); + if (!WhichDataType(arguments[1].type).isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column type {} of function {}, should be a native UInt", + arguments[1].type->getName(), getName()); + const auto * mask = typeid_cast(col_const->getDataColumnPtr().get()); + if (mask) + { + tuple_size = mask->tupleSize(); + } + else if (WhichDataType(arguments[0].type).isNativeUInt()) + { + tuple_size = col_const->getUInt(0); + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column type {} of function {}, should be UInt or Tuple", + arguments[0].type->getName(), getName()); + if (tuple_size > 8 || tuple_size < 1) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Illegal first argument for function {}, should be a number in range 1-8 or a Tuple of such size", + getName()); + if (mask) + { + const auto * type_tuple = typeid_cast(arguments[0].type.get()); + for (size_t i = 0; i < tuple_size; i++) + { + if (!WhichDataType(type_tuple->getElement(i)).isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument in tuple for function {}, should be a native UInt", + type_tuple->getElement(i)->getName(), getName()); + auto ratio = mask->getColumn(i).getUInt(0); + if (ratio > 8 || ratio < 1) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Illegal argument {} in tuple for function {}, should be a number in range 1-8", + ratio, getName()); + } + } + DataTypes types(tuple_size); + for (size_t i = 0; i < tuple_size; i++) + { + types[i] = std::make_shared(); + } + return std::make_shared(types); + } + + static UInt64 shrink(UInt64 ratio, UInt64 value) + { + switch (ratio) + { + case 1: + return value; + case 2: + return std::get<1>(MortonND_2D_Dec.Decode(value)); + case 3: + return std::get<2>(MortonND_3D_Dec.Decode(value)); + case 4: + return std::get<3>(MortonND_4D_Dec.Decode(value)); + case 5: + return std::get<4>(MortonND_5D_Dec.Decode(value)); + case 6: + return std::get<5>(MortonND_6D_Dec.Decode(value)); + case 7: + return std::get<6>(MortonND_7D_Dec.Decode(value)); + case 8: + return std::get<7>(MortonND_8D_Dec.Decode(value)); + } + return value; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + EXECUTE() + } +}; +) // DECLARE_DEFAULT_CODE + +#if defined(MORTON_ND_BMI2_ENABLED) +#undef DECODE +#define DECODE(ND, ...) \ + if (nd == (ND)) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + auto res = MortonND_##ND##D::Decode(col_code->getUInt(i)); \ + __VA_ARGS__ \ + } \ + } + +DECLARE_AVX2_SPECIFIC_CODE( +using MortonND_2D = mortonnd::MortonNDBmi<2, uint64_t>; +using MortonND_3D = mortonnd::MortonNDBmi<3, uint64_t>; +using MortonND_4D = mortonnd::MortonNDBmi<4, uint64_t>; +using MortonND_5D = mortonnd::MortonNDBmi<5, uint64_t>; +using MortonND_6D = mortonnd::MortonNDBmi<6, uint64_t>; +using MortonND_7D = mortonnd::MortonNDBmi<7, uint64_t>; +using MortonND_8D = mortonnd::MortonNDBmi<8, uint64_t>; +class FunctionMortonDecode: public TargetSpecific::Default::FunctionMortonDecode +{ + static UInt64 shrink(UInt64 ratio, UInt64 value) + { + switch (ratio) + { + case 1: + return value; + case 2: + return std::get<1>(MortonND_2D::Decode(value)); + case 3: + return std::get<2>(MortonND_3D::Decode(value)); + case 4: + return std::get<3>(MortonND_4D::Decode(value)); + case 5: + return std::get<4>(MortonND_5D::Decode(value)); + case 6: + return std::get<5>(MortonND_6D::Decode(value)); + case 7: + return std::get<6>(MortonND_7D::Decode(value)); + case 8: + return std::get<7>(MortonND_8D::Decode(value)); + } + return value; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + EXECUTE() + } +}; +) +#endif // MORTON_ND_BMI2_ENABLED + +#undef DECODE +#undef MASK +#undef EXTRACT_VECTOR +#undef EXECUTE + +class FunctionMortonDecode: public TargetSpecific::Default::FunctionMortonDecode +{ +public: + explicit FunctionMortonDecode(ContextPtr context) : selector(context) + { + selector.registerImplementation(); + +#if USE_MULTITARGET_CODE && defined(MORTON_ND_BMI2_ENABLED) + selector.registerImplementation(); +#endif + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + return selector.selectAndExecute(arguments, result_type, input_rows_count); + } + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + +private: + ImplementationSelector selector; +}; + +REGISTER_FUNCTION(MortonDecode) +{ + factory.registerFunction({ + R"( +Decodes a Morton encoding (ZCurve) into the corresponding unsigned integer tuple + +The function has two modes of operation: +- Simple +- Expanded + +Simple: accepts a resulting tuple size as a first argument and the code as a second argument. +[example:simple] +Will decode into: `(1,2,3,4)` +The resulting tuple size cannot be more than 8 + +Expanded: accepts a range mask (tuple) as a first argument and the code as a second argument. +Each number in mask configures the amount of range shrink +1 - no shrink +2 - 2x shrink +3 - 3x shrink +.... +Up to 8x shrink. +[example:range_shrank] +Note: see mortonEncode() docs on why range change might be beneficial. +Still limited to 8 numbers at most. + +Morton code for one argument is always the argument itself (as a tuple). +[example:identity] +Produces: `(1)` + +You can shrink one argument too: +[example:identity_shrank] +Produces: `(128)` + +The function accepts a column of codes as a second argument: +[example:from_table] + +The range tuple must be a constant: +[example:from_table_range] +)", + Documentation::Examples{ + {"simple", "SELECT mortonDecode(4, 2149)"}, + {"range_shrank", "SELECT mortonDecode((1,2), 1572864)"}, + {"identity", "SELECT mortonDecode(1, 1)"}, + {"identity_shrank", "SELECT mortonDecode(tuple(2), 32768)"}, + {"from_table", "SELECT mortonDecode(2, code) FROM table"}, + {"from_table_range", "SELECT mortonDecode((1,2), code) FROM table"}, + }, + Documentation::Categories {"ZCurve", "Morton coding"} + }); +} + +} diff --git a/src/Functions/mortonEncode.cpp b/src/Functions/mortonEncode.cpp new file mode 100644 index 00000000000..4bdd237fa9c --- /dev/null +++ b/src/Functions/mortonEncode.cpp @@ -0,0 +1,393 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#if USE_MULTITARGET_CODE && defined(__BMI2__) +#include +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; +} + +#define EXTRACT_VECTOR(INDEX) \ + const ColumnPtr & col##INDEX = non_const_arguments[(INDEX) + vectorStartIndex].column; + +#define ENCODE(ND, ...) \ + if (nd == (ND)) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + vec_res[i] = MortonND_##ND##D_Enc.Encode(__VA_ARGS__); \ + } \ + return col_res; \ + } + +#define EXPAND(IDX, ...) \ + (mask) ? expand(mask->getColumn(IDX).getUInt(0), __VA_ARGS__) : __VA_ARGS__ + +#define MASK(ND, IDX, ...) \ + (EXPAND(IDX, __VA_ARGS__) & MortonND_##ND##D_Enc.InputMask()) + +#define EXECUTE() \ + size_t nd = arguments.size(); \ + size_t vectorStartIndex = 0; \ + const auto * const_col = typeid_cast(arguments[0].column.get()); \ + const ColumnTuple * mask; \ + if (const_col) \ + mask = typeid_cast(const_col->getDataColumnPtr().get()); \ + else \ + mask = typeid_cast(arguments[0].column.get()); \ + if (mask) \ + { \ + nd = mask->tupleSize(); \ + vectorStartIndex = 1; \ + for (size_t i = 0; i < nd; i++) \ + { \ + auto ratio = mask->getColumn(i).getUInt(0); \ + if (ratio > 8 || ratio < 1) \ + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, \ + "Illegal argument {} of function {}, should be a number in range 1-8", \ + arguments[0].column->getName(), getName()); \ + } \ + } \ + \ + auto non_const_arguments = arguments; \ + for (auto & argument : non_const_arguments) \ + argument.column = argument.column->convertToFullColumnIfConst(); \ + \ + auto col_res = ColumnUInt64::create(); \ + ColumnUInt64::Container & vec_res = col_res->getData(); \ + vec_res.resize(input_rows_count); \ + \ + EXTRACT_VECTOR(0) \ + if (nd == 1) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + vec_res[i] = EXPAND(0, col0->getUInt(i)); \ + } \ + return col_res; \ + } \ + \ + EXTRACT_VECTOR(1) \ + ENCODE(2, \ + MASK(2, 0, col0->getUInt(i)), \ + MASK(2, 1, col1->getUInt(i))) \ + EXTRACT_VECTOR(2) \ + ENCODE(3, \ + MASK(3, 0, col0->getUInt(i)), \ + MASK(3, 1, col1->getUInt(i)), \ + MASK(3, 2, col2->getUInt(i))) \ + EXTRACT_VECTOR(3) \ + ENCODE(4, \ + MASK(4, 0, col0->getUInt(i)), \ + MASK(4, 1, col1->getUInt(i)), \ + MASK(4, 2, col2->getUInt(i)), \ + MASK(4, 3, col3->getUInt(i))) \ + EXTRACT_VECTOR(4) \ + ENCODE(5, \ + MASK(5, 0, col0->getUInt(i)), \ + MASK(5, 1, col1->getUInt(i)), \ + MASK(5, 2, col2->getUInt(i)), \ + MASK(5, 3, col3->getUInt(i)), \ + MASK(5, 4, col4->getUInt(i))) \ + EXTRACT_VECTOR(5) \ + ENCODE(6, \ + MASK(6, 0, col0->getUInt(i)), \ + MASK(6, 1, col1->getUInt(i)), \ + MASK(6, 2, col2->getUInt(i)), \ + MASK(6, 3, col3->getUInt(i)), \ + MASK(6, 4, col4->getUInt(i)), \ + MASK(6, 5, col5->getUInt(i))) \ + EXTRACT_VECTOR(6) \ + ENCODE(7, \ + MASK(7, 0, col0->getUInt(i)), \ + MASK(7, 1, col1->getUInt(i)), \ + MASK(7, 2, col2->getUInt(i)), \ + MASK(7, 3, col3->getUInt(i)), \ + MASK(7, 4, col4->getUInt(i)), \ + MASK(7, 5, col5->getUInt(i)), \ + MASK(7, 6, col6->getUInt(i))) \ + EXTRACT_VECTOR(7) \ + ENCODE(8, \ + MASK(8, 0, col0->getUInt(i)), \ + MASK(8, 1, col1->getUInt(i)), \ + MASK(8, 2, col2->getUInt(i)), \ + MASK(8, 3, col3->getUInt(i)), \ + MASK(8, 4, col4->getUInt(i)), \ + MASK(8, 5, col5->getUInt(i)), \ + MASK(8, 6, col6->getUInt(i)), \ + MASK(8, 7, col7->getUInt(i))) \ + \ + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, \ + "Illegal number of UInt arguments of function {}, max: 8", \ + getName()); \ + +DECLARE_DEFAULT_CODE( +constexpr auto MortonND_2D_Enc = mortonnd::MortonNDLutEncoder<2, 32, 8>(); +constexpr auto MortonND_3D_Enc = mortonnd::MortonNDLutEncoder<3, 21, 8>(); +constexpr auto MortonND_4D_Enc = mortonnd::MortonNDLutEncoder<4, 16, 8>(); +constexpr auto MortonND_5D_Enc = mortonnd::MortonNDLutEncoder<5, 12, 8>(); +constexpr auto MortonND_6D_Enc = mortonnd::MortonNDLutEncoder<6, 10, 8>(); +constexpr auto MortonND_7D_Enc = mortonnd::MortonNDLutEncoder<7, 9, 8>(); +constexpr auto MortonND_8D_Enc = mortonnd::MortonNDLutEncoder<8, 8, 8>(); +class FunctionMortonEncode : public IFunction +{ +public: + static constexpr auto name = "mortonEncode"; + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + bool isVariadic() const override + { + return true; + } + + size_t getNumberOfArguments() const override + { + return 0; + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DB::DataTypes & arguments) const override + { + size_t vectorStartIndex = 0; + if (arguments.empty()) + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, + "At least one UInt argument is required for function {}", + getName()); + if (WhichDataType(arguments[0]).isTuple()) + { + vectorStartIndex = 1; + const auto * type_tuple = typeid_cast(arguments[0].get()); + auto tuple_size = type_tuple->getElements().size(); + if (tuple_size != (arguments.size() - 1)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Illegal argument {} for function {}, tuple size should be equal to number of UInt arguments", + arguments[0]->getName(), getName()); + for (size_t i = 0; i < tuple_size; i++) + { + if (!WhichDataType(type_tuple->getElement(i)).isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument in tuple for function {}, should be a native UInt", + type_tuple->getElement(i)->getName(), getName()); + } + } + + for (size_t i = vectorStartIndex; i < arguments.size(); i++) + { + const auto & arg = arguments[i]; + if (!WhichDataType(arg).isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}, should be a native UInt", + arg->getName(), getName()); + } + return std::make_shared(); + } + + static UInt64 expand(UInt64 ratio, UInt64 value) + { + switch (ratio) + { + case 1: + return value; + case 2: + return MortonND_2D_Enc.Encode(0, value & MortonND_2D_Enc.InputMask()); + case 3: + return MortonND_3D_Enc.Encode(0, 0, value & MortonND_3D_Enc.InputMask()); + case 4: + return MortonND_4D_Enc.Encode(0, 0, 0, value & MortonND_4D_Enc.InputMask()); + case 5: + return MortonND_5D_Enc.Encode(0, 0, 0, 0, value & MortonND_5D_Enc.InputMask()); + case 6: + return MortonND_6D_Enc.Encode(0, 0, 0, 0, 0, value & MortonND_6D_Enc.InputMask()); + case 7: + return MortonND_7D_Enc.Encode(0, 0, 0, 0, 0, 0, value & MortonND_7D_Enc.InputMask()); + case 8: + return MortonND_8D_Enc.Encode(0, 0, 0, 0, 0, 0, 0, value & MortonND_8D_Enc.InputMask()); + } + return value; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + EXECUTE() + } +}; +) // DECLARE_DEFAULT_CODE + +#if defined(MORTON_ND_BMI2_ENABLED) +#undef ENCODE +#define ENCODE(ND, ...) \ + if (nd == (ND)) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + vec_res[i] = MortonND_##ND##D::Encode(__VA_ARGS__); \ + } \ + return col_res; \ + } + +#undef MASK +#define MASK(ND, IDX, ...) \ + (EXPAND(IDX, __VA_ARGS__)) + +DECLARE_AVX2_SPECIFIC_CODE( +using MortonND_2D = mortonnd::MortonNDBmi<2, uint64_t>; +using MortonND_3D = mortonnd::MortonNDBmi<3, uint64_t>; +using MortonND_4D = mortonnd::MortonNDBmi<4, uint64_t>; +using MortonND_5D = mortonnd::MortonNDBmi<5, uint64_t>; +using MortonND_6D = mortonnd::MortonNDBmi<6, uint64_t>; +using MortonND_7D = mortonnd::MortonNDBmi<7, uint64_t>; +using MortonND_8D = mortonnd::MortonNDBmi<8, uint64_t>; + +class FunctionMortonEncode : public TargetSpecific::Default::FunctionMortonEncode +{ +public: + static UInt64 expand(UInt64 ratio, UInt64 value) + { + switch (ratio) + { + case 1: + return value; + case 2: + return MortonND_2D::Encode(0, value); + case 3: + return MortonND_3D::Encode(0, 0, value); + case 4: + return MortonND_4D::Encode(0, 0, 0, value); + case 5: + return MortonND_5D::Encode(0, 0, 0, 0, value); + case 6: + return MortonND_6D::Encode(0, 0, 0, 0, 0, value); + case 7: + return MortonND_7D::Encode(0, 0, 0, 0, 0, 0, value); + case 8: + return MortonND_8D::Encode(0, 0, 0, 0, 0, 0, 0, value); + } + return value; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + EXECUTE() + } +}; +) // DECLARE_AVX2_SPECIFIC_CODE +#endif // MORTON_ND_BMI2_ENABLED + +#undef ENCODE +#undef MASK +#undef EXTRACT_VECTOR +#undef EXPAND +#undef EXECUTE + +class FunctionMortonEncode: public TargetSpecific::Default::FunctionMortonEncode +{ +public: + explicit FunctionMortonEncode(ContextPtr context) : selector(context) + { + selector.registerImplementation(); + +#if USE_MULTITARGET_CODE && defined(MORTON_ND_BMI2_ENABLED) + selector.registerImplementation(); +#endif + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + return selector.selectAndExecute(arguments, result_type, input_rows_count); + } + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + +private: + ImplementationSelector selector; +}; + +REGISTER_FUNCTION(MortonEncode) +{ + factory.registerFunction({ + R"( +Calculates Morton encoding (ZCurve) for a list of unsigned integers + +The function has two modes of operation: +- Simple +- Expanded + +Simple: accepts up to 8 unsigned integers as arguments and produces a UInt64 code. +[example:simple] + +Expanded: accepts a range mask (tuple) as a first argument and up to 8 unsigned integers as other arguments. +Each number in mask configures the amount of range expansion +1 - no expansion +2 - 2x expansion +3 - 3x expansion +.... +Up to 8x expansion. +[example:range_expanded] +Note: tuple size must be equal to the number of the other arguments + +Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality) +For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF) + +Morton encoding for one argument is always the argument itself. +[example:identity] +Produces: `1` + +You can expand one argument too: +[example:identity_expanded] +Produces: `32768` + +The function also accepts columns as arguments: +[example:from_table] + +But the range tuple must still be a constant: +[example:from_table_range] + +Please note that you can fit only so much bits of information into Morton code as UInt64 has. +Two arguments will have a range of maximum 2^32 (64/2) each +Three arguments: range of max 2^21 (64/3) each +And so on, all overflow will be clamped to zero +)", + Documentation::Examples{ + {"simple", "SELECT mortonEncode(1, 2, 3)"}, + {"range_expanded", "SELECT mortonEncode((1,2), 1024, 16)"}, + {"identity", "SELECT mortonEncode(1)"}, + {"identity_expanded", "SELECT mortonEncode(tuple(2), 128)"}, + {"from_table", "SELECT mortonEncode(n1, n2) FROM table"}, + {"from_table_range", "SELECT mortonEncode((1,2), n1, n2) FROM table"}, + }, + Documentation::Categories {"ZCurve", "Morton coding"} + }); +} + +} diff --git a/tests/queries/0_stateless/02457_morton_coding.reference b/tests/queries/0_stateless/02457_morton_coding.reference new file mode 100644 index 00000000000..311a515a458 --- /dev/null +++ b/tests/queries/0_stateless/02457_morton_coding.reference @@ -0,0 +1,12 @@ +----- START ----- +----- CONST ----- +2149 +(1,2,3,4) +4294967286 +(65534,65533) +4294967286 +(4294967286) +----- 256, 8 ----- +----- 65536, 4 ----- +----- 4294967296, 2 ----- +----- END ----- diff --git a/tests/queries/0_stateless/02457_morton_coding.sql b/tests/queries/0_stateless/02457_morton_coding.sql new file mode 100644 index 00000000000..4fc26f255f4 --- /dev/null +++ b/tests/queries/0_stateless/02457_morton_coding.sql @@ -0,0 +1,137 @@ +SELECT '----- START -----'; +drop table if exists morton_numbers_02457; +create table morton_numbers_02457( + n1 UInt32, + n2 UInt32, + n3 UInt16, + n4 UInt16, + n5 UInt8, + n6 UInt8, + n7 UInt8, + n8 UInt8 +) + Engine=MergeTree() + ORDER BY n1; + +SELECT '----- CONST -----'; +select mortonEncode(1,2,3,4); +select mortonDecode(4, 2149); +select mortonEncode(65534, 65533); +select mortonDecode(2, 4294967286); +select mortonEncode(4294967286); +select mortonDecode(1, 4294967286); + +SELECT '----- 256, 8 -----'; +insert into morton_numbers_02457 +select n1.number, n2.number, n3.number, n4.number, n5.number, n6.number, n7.number, n8.number +from numbers(256-4, 4) n1 + cross join numbers(256-4, 4) n2 + cross join numbers(256-4, 4) n3 + cross join numbers(256-4, 4) n4 + cross join numbers(256-4, 4) n5 + cross join numbers(256-4, 4) n6 + cross join numbers(256-4, 4) n7 + cross join numbers(256-4, 4) n8 +; +drop table if exists morton_numbers_1_02457; +create table morton_numbers_1_02457( + n1 UInt64, + n2 UInt64, + n3 UInt64, + n4 UInt64, + n5 UInt64, + n6 UInt64, + n7 UInt64, + n8 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_1_02457 +select untuple(mortonDecode(8, mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8))) +from morton_numbers_02457; + +( + select * from morton_numbers_02457 + union distinct + select * from morton_numbers_1_02457 +) +except +( + select * from morton_numbers_02457 + intersect + select * from morton_numbers_1_02457 +); +drop table if exists morton_numbers_1_02457; + +SELECT '----- 65536, 4 -----'; +insert into morton_numbers_02457 +select n1.number, n2.number, n3.number, n4.number, 0, 0, 0, 0 +from numbers(pow(2, 16)-8,8) n1 + cross join numbers(pow(2, 16)-8, 8) n2 + cross join numbers(pow(2, 16)-8, 8) n3 + cross join numbers(pow(2, 16)-8, 8) n4 +; + +create table morton_numbers_2_02457( + n1 UInt64, + n2 UInt64, + n3 UInt64, + n4 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_2_02457 +select untuple(mortonDecode(4, mortonEncode(n1, n2, n3, n4))) +from morton_numbers_02457; + +( + select n1, n2, n3, n4 from morton_numbers_02457 + union distinct + select n1, n2, n3, n4 from morton_numbers_2_02457 +) +except +( + select n1, n2, n3, n4 from morton_numbers_02457 + intersect + select n1, n2, n3, n4 from morton_numbers_2_02457 +); +drop table if exists morton_numbers_2_02457; + +SELECT '----- 4294967296, 2 -----'; +insert into morton_numbers_02457 +select n1.number, n2.number, 0, 0, 0, 0, 0, 0 +from numbers(pow(2, 32)-8,8) n1 + cross join numbers(pow(2, 32)-8, 8) n2 + cross join numbers(pow(2, 32)-8, 8) n3 + cross join numbers(pow(2, 32)-8, 8) n4 +; + +drop table if exists morton_numbers_3_02457; +create table morton_numbers_3_02457( + n1 UInt64, + n2 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_3_02457 +select untuple(mortonDecode(2, mortonEncode(n1, n2))) +from morton_numbers_02457; + +( + select n1, n2 from morton_numbers_3_02457 + union distinct + select n1, n2 from morton_numbers_3_02457 +) +except +( + select n1, n2 from morton_numbers_3_02457 + intersect + select n1, n2 from morton_numbers_3_02457 +); +drop table if exists morton_numbers_3_02457; + +SELECT '----- END -----'; +drop table if exists morton_numbers_02457; diff --git a/tests/queries/0_stateless/02457_morton_coding_with_mask.reference b/tests/queries/0_stateless/02457_morton_coding_with_mask.reference new file mode 100644 index 00000000000..32d5ce3ee27 --- /dev/null +++ b/tests/queries/0_stateless/02457_morton_coding_with_mask.reference @@ -0,0 +1,15 @@ +----- START ----- +----- CONST ----- +4205569 +(1,2,3,4) +4294967286 +(65534,65533) +4294967286 +(4294967286) +2147483648 +(128) +0 +----- (1,2,1,2) ----- +----- (1,4) ----- +----- (1,1,2) ----- +----- END ----- diff --git a/tests/queries/0_stateless/02457_morton_coding_with_mask.sql b/tests/queries/0_stateless/02457_morton_coding_with_mask.sql new file mode 100644 index 00000000000..5aeb1f380be --- /dev/null +++ b/tests/queries/0_stateless/02457_morton_coding_with_mask.sql @@ -0,0 +1,143 @@ +SELECT '----- START -----'; + +SELECT '----- CONST -----'; +select mortonEncode((1,2,3,1), 1,2,3,4); +select mortonDecode((1, 2, 3, 1), 4205569); +select mortonEncode((1,1), 65534, 65533); +select mortonDecode((1,1), 4294967286); +select mortonEncode(tuple(1), 4294967286); +select mortonDecode(tuple(1), 4294967286); +select mortonEncode(tuple(4), 128); +select mortonDecode(tuple(4), 2147483648); +select mortonEncode((4,4,4,4), 128, 128, 128, 128); + +SELECT '----- (1,2,1,2) -----'; +drop table if exists morton_numbers_mask_02457; +create table morton_numbers_mask_02457( + n1 UInt8, + n2 UInt8, + n3 UInt8, + n4 UInt8 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_02457 +select n1.number, n2.number, n3.number, n4.number +from numbers(256-16, 16) n1 + cross join numbers(256-16, 16) n2 + cross join numbers(256-16, 16) n3 + cross join numbers(256-16, 16) n4 +; +drop table if exists morton_numbers_mask_1_02457; +create table morton_numbers_mask_1_02457( + n1 UInt64, + n2 UInt64, + n3 UInt64, + n4 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_1_02457 +select untuple(mortonDecode((1,2,1,2), mortonEncode((1,2,1,2), n1, n2, n3, n4))) +from morton_numbers_mask_02457; + +( + select * from morton_numbers_mask_02457 + union distinct + select * from morton_numbers_mask_1_02457 +) +except +( + select * from morton_numbers_mask_02457 + intersect + select * from morton_numbers_mask_1_02457 +); +drop table if exists morton_numbers_mask_02457; +drop table if exists morton_numbers_mask_1_02457; + +SELECT '----- (1,4) -----'; +drop table if exists morton_numbers_mask_02457; +create table morton_numbers_mask_02457( + n1 UInt32, + n2 UInt8 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_02457 +select n1.number, n2.number +from numbers(pow(2, 32)-64, 64) n1 + cross join numbers(pow(2, 8)-64, 64) n2 +; +drop table if exists morton_numbers_mask_2_02457; +create table morton_numbers_mask_2_02457( + n1 UInt64, + n2 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_2_02457 +select untuple(mortonDecode((1,4), mortonEncode((1,4), n1, n2))) +from morton_numbers_mask_02457; + +( + select * from morton_numbers_mask_02457 + union distinct + select * from morton_numbers_mask_2_02457 +) +except +( + select * from morton_numbers_mask_02457 + intersect + select * from morton_numbers_mask_2_02457 +); +drop table if exists morton_numbers_mask_02457; +drop table if exists morton_numbers_mask_2_02457; + +SELECT '----- (1,1,2) -----'; +drop table if exists morton_numbers_mask_02457; +create table morton_numbers_mask_02457( + n1 UInt16, + n2 UInt16, + n3 UInt8, +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_02457 +select n1.number, n2.number, n3.number +from numbers(pow(2, 16)-64, 64) n1 + cross join numbers(pow(2, 16)-64, 64) n2 + cross join numbers(pow(2, 8)-64, 64) n3 +; +drop table if exists morton_numbers_mask_3_02457; +create table morton_numbers_mask_3_02457( + n1 UInt64, + n2 UInt64, + n3 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_3_02457 +select untuple(mortonDecode((1,1,2), mortonEncode((1,1,2), n1, n2, n3))) +from morton_numbers_mask_02457; + +( + select * from morton_numbers_mask_02457 + union distinct + select * from morton_numbers_mask_3_02457 +) +except +( + select * from morton_numbers_mask_02457 + intersect + select * from morton_numbers_mask_3_02457 +); +drop table if exists morton_numbers_mask_02457; +drop table if exists morton_numbers_mask_3_02457; + +SELECT '----- END -----'; From 27494aea6366655881f198d06ef9246f1755d7d1 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 16 Oct 2022 23:31:03 +0200 Subject: [PATCH 2/5] Make getResource() independent from the order of the sections It is possible for end section goes before begin section for some resource, and this case it will not find it. This is what happens here [1]: [1]: https://s3.amazonaws.com/clickhouse-test-reports/41988/053fec3f451e97ac41b6c223d95013b758a9a330/fast_test.html Signed-off-by: Azat Khuzhin --- src/Common/SymbolIndex.cpp | 24 +++++++++++++----------- src/Common/SymbolIndex.h | 13 ++++++++++--- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index e217d23cc27..6f31009b1d2 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -99,23 +99,25 @@ void updateResources(ElfW(Addr) base_address, std::string_view object_name, std: name = name.substr((name[0] == '_') + strlen("binary_")); name = name.substr(0, name.size() - strlen("_start")); - resources.emplace(name, SymbolIndex::ResourcesBlob{ - base_address, - object_name, - std::string_view{char_address, 0}, // NOLINT - }); + auto & resource = resources[name]; + if (!resource.base_address || resource.base_address == base_address) + { + resource.base_address = base_address; + resource.start = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor) + resource.object_name = object_name; + } } - else if (name.ends_with("_end")) + if (name.ends_with("_end")) { name = name.substr((name[0] == '_') + strlen("binary_")); name = name.substr(0, name.size() - strlen("_end")); - auto it = resources.find(name); - if (it != resources.end() && it->second.base_address == base_address && it->second.data.empty()) + auto & resource = resources[name]; + if (!resource.base_address || resource.base_address == base_address) { - const char * start = it->second.data.data(); - assert(char_address >= start); - it->second.data = std::string_view{start, static_cast(char_address - start)}; + resource.base_address = base_address; + resource.end = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor) + resource.object_name = object_name; } } } diff --git a/src/Common/SymbolIndex.h b/src/Common/SymbolIndex.h index f2b40f02ead..47162331946 100644 --- a/src/Common/SymbolIndex.h +++ b/src/Common/SymbolIndex.h @@ -51,7 +51,7 @@ public: std::string_view getResource(String name) const { if (auto it = data.resources.find(name); it != data.resources.end()) - return it->second.data; + return it->second.data(); return {}; } @@ -63,11 +63,18 @@ public: { /// Symbol can be presented in multiple shared objects, /// base_address will be used to compare only symbols from the same SO. - ElfW(Addr) base_address; + ElfW(Addr) base_address = 0; /// Just a human name of the SO. std::string_view object_name; /// Data blob. - std::string_view data; + std::string_view start; + std::string_view end; + + std::string_view data() const + { + assert(end.data() >= start.data()); + return std::string_view{start.data(), static_cast(end.data() - start.data())}; + } }; using Resources = std::unordered_map; From 655233e1c5a424480f271926f08a69359ad5b7c8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 19 Oct 2022 09:13:02 +0000 Subject: [PATCH 3/5] Add convenience typedefs for Date/Date32/DateTime/DateTime64 columns --- src/Columns/ColumnsDateTime.h | 20 +++++++++++++++++ src/Functions/FunctionsTimeWindow.cpp | 13 ++++++----- src/Functions/dateDiff.cpp | 32 +++++++++++++-------------- src/Functions/makeDate.cpp | 9 ++++---- src/Functions/nowInBlock.cpp | 4 ++-- src/Functions/timeSlots.cpp | 13 ++++++----- src/Functions/toStartOfInterval.cpp | 9 ++++---- 7 files changed, 62 insertions(+), 38 deletions(-) create mode 100644 src/Columns/ColumnsDateTime.h diff --git a/src/Columns/ColumnsDateTime.h b/src/Columns/ColumnsDateTime.h new file mode 100644 index 00000000000..90d21ed5ff7 --- /dev/null +++ b/src/Columns/ColumnsDateTime.h @@ -0,0 +1,20 @@ +#pragma once + +#include +#include +#include +#include +#include + + +namespace DB +{ + +/** Convenience typedefs for columns of SQL types Date, Date32, DateTime and DateTime64. */ + +using ColumnDate = DataTypeDate::ColumnType; +using ColumnDate32 = DataTypeDate32::ColumnType; +using ColumnDateTime = DataTypeDateTime::ColumnType; +using ColumnDateTime64 = DataTypeDateTime64::ColumnType; + +} diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index 286ed4a729d..a47fc71c335 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -157,7 +158,7 @@ struct TimeWindowImpl const auto & interval_column = arguments[1]; const auto & from_datatype = *time_column.type.get(); const auto which_type = WhichDataType(from_datatype); - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0); if (!which_type.isDateTime() || !time_column_vec) throw Exception( @@ -198,7 +199,7 @@ struct TimeWindowImpl } template - static ColumnPtr executeTumble(const ColumnUInt32 & time_column, UInt64 num_units, const DateLUTImpl & time_zone) + static ColumnPtr executeTumble(const ColumnDateTime & time_column, UInt64 num_units, const DateLUTImpl & time_zone) { const auto & time_data = time_column.getData(); size_t size = time_column.size(); @@ -342,7 +343,7 @@ struct TimeWindowImpl const auto & hop_interval_column = arguments[1]; const auto & window_interval_column = arguments[2]; const auto & from_datatype = *time_column.type.get(); - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 3, 0); if (!WhichDataType(from_datatype).isDateTime() || !time_column_vec) throw Exception( @@ -402,7 +403,7 @@ struct TimeWindowImpl template static ColumnPtr - executeHop(const ColumnUInt32 & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone) + executeHop(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone) { const auto & time_data = time_column.getData(); size_t size = time_column.size(); @@ -491,7 +492,7 @@ struct TimeWindowImpl const auto & hop_interval_column = arguments[1]; const auto & window_interval_column = arguments[2]; const auto & from_datatype = *time_column.type.get(); - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 3, 0); if (!WhichDataType(from_datatype).isDateTime() || !time_column_vec) throw Exception( @@ -551,7 +552,7 @@ struct TimeWindowImpl template static ColumnPtr - executeHopSlice(const ColumnUInt32 & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone) + executeHopSlice(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone) { Int64 gcd_num_units = std::gcd(hop_num_units, window_num_units); diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index b33fcf32de1..f5a4b50fb54 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -44,7 +45,6 @@ namespace */ class FunctionDateDiff : public IFunction { - using ColumnDateTime64 = ColumnDecimal; public: static constexpr auto name = "dateDiff"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } @@ -141,19 +141,19 @@ private: const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, ColumnInt64::Container & result) const { - if (const auto * x_vec_16 = checkAndGetColumn(&x)) + if (const auto * x_vec_16 = checkAndGetColumn(&x)) dispatchForSecondColumn(*x_vec_16, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_32 = checkAndGetColumn(&x)) + else if (const auto * x_vec_32 = checkAndGetColumn(&x)) dispatchForSecondColumn(*x_vec_32, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_32_s = checkAndGetColumn(&x)) + else if (const auto * x_vec_32_s = checkAndGetColumn(&x)) dispatchForSecondColumn(*x_vec_32_s, y, timezone_x, timezone_y, result); else if (const auto * x_vec_64 = checkAndGetColumn(&x)) dispatchForSecondColumn(*x_vec_64, y, timezone_x, timezone_y, result); - else if (const auto * x_const_16 = checkAndGetColumnConst(&x)) + else if (const auto * x_const_16 = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_16->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_32 = checkAndGetColumnConst(&x)) + else if (const auto * x_const_32 = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_32->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_32_s = checkAndGetColumnConst(&x)) + else if (const auto * x_const_32_s = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_32_s->getValue(), y, timezone_x, timezone_y, result); else if (const auto * x_const_64 = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_64->getValue>(), y, timezone_x, timezone_y, result); @@ -169,19 +169,19 @@ private: const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, ColumnInt64::Container & result) const { - if (const auto * y_vec_16 = checkAndGetColumn(&y)) + if (const auto * y_vec_16 = checkAndGetColumn(&y)) vectorVector(x, *y_vec_16, timezone_x, timezone_y, result); - else if (const auto * y_vec_32 = checkAndGetColumn(&y)) + else if (const auto * y_vec_32 = checkAndGetColumn(&y)) vectorVector(x, *y_vec_32, timezone_x, timezone_y, result); - else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) + else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) vectorVector(x, *y_vec_32_s, timezone_x, timezone_y, result); else if (const auto * y_vec_64 = checkAndGetColumn(&y)) vectorVector(x, *y_vec_64, timezone_x, timezone_y, result); - else if (const auto * y_const_16 = checkAndGetColumnConst(&y)) + else if (const auto * y_const_16 = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_16->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_32 = checkAndGetColumnConst(&y)) + else if (const auto * y_const_32 = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_32->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_32_s = checkAndGetColumnConst(&y)) + else if (const auto * y_const_32_s = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_32_s->getValue(), timezone_x, timezone_y, result); else if (const auto * y_const_64 = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_64->getValue>(), timezone_x, timezone_y, result); @@ -197,11 +197,11 @@ private: const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, ColumnInt64::Container & result) const { - if (const auto * y_vec_16 = checkAndGetColumn(&y)) + if (const auto * y_vec_16 = checkAndGetColumn(&y)) constantVector(x, *y_vec_16, timezone_x, timezone_y, result); - else if (const auto * y_vec_32 = checkAndGetColumn(&y)) + else if (const auto * y_vec_32 = checkAndGetColumn(&y)) constantVector(x, *y_vec_32, timezone_x, timezone_y, result); - else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) + else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) constantVector(x, *y_vec_32_s, timezone_x, timezone_y, result); else if (const auto * y_vec_64 = checkAndGetColumn(&y)) constantVector(x, *y_vec_64, timezone_x, timezone_y, result); diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index c9571a7333d..7ebca71af13 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -149,7 +150,7 @@ struct MakeDateTraits { static constexpr auto name = "makeDate"; using ReturnDataType = DataTypeDate; - using ReturnColumnType = ColumnUInt16; + using ReturnColumnType = ColumnDate; static constexpr auto MIN_YEAR = 1970; static constexpr auto MAX_YEAR = 2149; @@ -162,7 +163,7 @@ struct MakeDate32Traits { static constexpr auto name = "makeDate32"; using ReturnDataType = DataTypeDate32; - using ReturnColumnType = ColumnInt32; + using ReturnColumnType = ColumnDate32; static constexpr auto MIN_YEAR = 1900; static constexpr auto MAX_YEAR = 2299; @@ -267,7 +268,7 @@ public: Columns converted_arguments; convertRequiredArguments(arguments, converted_arguments); - auto res_column = ColumnUInt32::create(input_rows_count); + auto res_column = ColumnDateTime::create(input_rows_count); auto & result_data = res_column->getData(); const auto & year_data = typeid_cast(*converted_arguments[0]).getData(); @@ -365,7 +366,7 @@ public: fraction_data = &typeid_cast(*converted_arguments[6]).getData(); } - auto res_column = ColumnDecimal::create(input_rows_count, static_cast(precision)); + auto res_column = ColumnDateTime64::create(input_rows_count, static_cast(precision)); auto & result_data = res_column->getData(); const auto & year_data = typeid_cast(*converted_arguments[0]).getData(); diff --git a/src/Functions/nowInBlock.cpp b/src/Functions/nowInBlock.cpp index c771d83225a..b1764590fda 100644 --- a/src/Functions/nowInBlock.cpp +++ b/src/Functions/nowInBlock.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include namespace DB @@ -74,7 +74,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override { - return ColumnUInt32::create(input_rows_count, static_cast(time(nullptr))); + return ColumnDateTime::create(input_rows_count, static_cast(time(nullptr))); } }; diff --git a/src/Functions/timeSlots.cpp b/src/Functions/timeSlots.cpp index e986e32d76f..72d6059e0a1 100644 --- a/src/Functions/timeSlots.cpp +++ b/src/Functions/timeSlots.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -300,11 +301,11 @@ public: throw Exception("Third argument for function " + getName() + " must be greater than zero", ErrorCodes::ILLEGAL_COLUMN); } - const auto * dt_starts = checkAndGetColumn(arguments[0].column.get()); - const auto * dt_const_starts = checkAndGetColumnConst(arguments[0].column.get()); + const auto * dt_starts = checkAndGetColumn(arguments[0].column.get()); + const auto * dt_const_starts = checkAndGetColumnConst(arguments[0].column.get()); - const auto * durations = checkAndGetColumn(arguments[1].column.get()); - const auto * const_durations = checkAndGetColumnConst(arguments[1].column.get()); + const auto * durations = checkAndGetColumn(arguments[1].column.get()); + const auto * const_durations = checkAndGetColumnConst(arguments[1].column.get()); auto res = ColumnArray::create(ColumnUInt32::create()); ColumnUInt32::Container & res_values = typeid_cast(res->getData()).getData(); @@ -341,8 +342,8 @@ public: time_slot_scale = assert_cast(arguments[2].type.get())->getScale(); } - const auto * starts = checkAndGetColumn(arguments[0].column.get()); - const auto * const_starts = checkAndGetColumnConst(arguments[0].column.get()); + const auto * starts = checkAndGetColumn(arguments[0].column.get()); + const auto * const_starts = checkAndGetColumnConst(arguments[0].column.get()); const auto * durations = checkAndGetColumn>(arguments[1].column.get()); const auto * const_durations = checkAndGetColumnConst>(arguments[1].column.get()); diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 32fe574f56a..3054cf280d9 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -437,7 +438,7 @@ private: if (which_type.isDateTime64()) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); auto scale = assert_cast(from_datatype).getScale(); if (time_column_vec) @@ -445,19 +446,19 @@ private: } if (which_type.isDateTime()) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } if (which_type.isDate()) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } if (which_type.isDate32()) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } From 787c25243798d3a23a5779b3cf82d1ce4a4c84b9 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 24 Oct 2022 21:07:52 +0200 Subject: [PATCH 4/5] Add a template for installation issues --- .../ISSUE_TEMPLATE/96_installation-issues.md | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/96_installation-issues.md diff --git a/.github/ISSUE_TEMPLATE/96_installation-issues.md b/.github/ISSUE_TEMPLATE/96_installation-issues.md new file mode 100644 index 00000000000..c322ccc92ce --- /dev/null +++ b/.github/ISSUE_TEMPLATE/96_installation-issues.md @@ -0,0 +1,29 @@ +--- +name: Installation issue +about: Issue with ClickHouse installation from https://clickhouse.com/docs/en/install/ +title: '' +labels: comp-install +assignees: '' + +--- + +**Installation type** + +Packages, docker, single binary, curl? + +**Source of the ClickHouse** + +A link to the source. Or the command you've tried + +**Expected result** + +What you expected + +**The actual result** + +What you get + +**How to reproduce** + +* For Linux-based operating systems: provide a script for clear docker container from the official image +* For anything else: steps to reproduce on as much as possible clear system From e30d425869ae332d3b1c2ddf3f85866355d4e27c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 25 Oct 2022 01:55:02 +0000 Subject: [PATCH 5/5] fix build --- src/Columns/tests/gtest_column_vector.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Columns/tests/gtest_column_vector.cpp b/src/Columns/tests/gtest_column_vector.cpp index c5f0b3bd50a..5017d687791 100644 --- a/src/Columns/tests/gtest_column_vector.cpp +++ b/src/Columns/tests/gtest_column_vector.cpp @@ -21,9 +21,7 @@ static MutableColumnPtr createColumn(size_t n) auto & values = column->getData(); for (size_t i = 0; i < n; ++i) - { - values.push_back(i); - } + values.push_back(static_cast(i)); return column; }