This commit is contained in:
Artem Mustafin 2024-05-28 17:59:06 +00:00
parent 18c586be87
commit ffa0f88292
4 changed files with 449 additions and 469 deletions

View File

@ -1,27 +1,237 @@
#include <Functions/hilbertDecode.h>
#include <Common/BitHelpers.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionSpaceFillingCurve.h>
#include <Functions/PerformanceAdaptors.h>
#include <limits>
namespace DB
{
namespace HilbertDetails
{
template <UInt8 bit_step>
class HilbertDecodeLookupTable
{
public:
constexpr static UInt8 LOOKUP_TABLE[0] = {};
};
template <>
class HilbertDecodeLookupTable<1>
{
public:
constexpr static UInt8 LOOKUP_TABLE[16] = {
4, 1, 3, 10,
0, 6, 7, 13,
15, 9, 8, 2,
11, 14, 12, 5
};
};
template <>
class HilbertDecodeLookupTable<2>
{
public:
constexpr static UInt8 LOOKUP_TABLE[64] = {
0, 20, 21, 49, 18, 3, 7, 38,
26, 11, 15, 46, 61, 41, 40, 12,
16, 1, 5, 36, 8, 28, 29, 57,
10, 30, 31, 59, 39, 54, 50, 19,
47, 62, 58, 27, 55, 35, 34, 6,
53, 33, 32, 4, 24, 9, 13, 44,
63, 43, 42, 14, 45, 60, 56, 25,
37, 52, 48, 17, 2, 22, 23, 51
};
};
template <>
class HilbertDecodeLookupTable<3>
{
public:
constexpr static UInt8 LOOKUP_TABLE[256] = {
64, 1, 9, 136, 16, 88, 89, 209, 18, 90, 91, 211, 139, 202, 194, 67,
4, 76, 77, 197, 70, 7, 15, 142, 86, 23, 31, 158, 221, 149, 148, 28,
36, 108, 109, 229, 102, 39, 47, 174, 118, 55, 63, 190, 253, 181, 180, 60,
187, 250, 242, 115, 235, 163, 162, 42, 233, 161, 160, 40, 112, 49, 57, 184,
0, 72, 73, 193, 66, 3, 11, 138, 82, 19, 27, 154, 217, 145, 144, 24,
96, 33, 41, 168, 48, 120, 121, 241, 50, 122, 123, 243, 171, 234, 226, 99,
100, 37, 45, 172, 52, 124, 125, 245, 54, 126, 127, 247, 175, 238, 230, 103,
223, 151, 150, 30, 157, 220, 212, 85, 141, 204, 196, 69, 6, 78, 79, 199,
255, 183, 182, 62, 189, 252, 244, 117, 173, 236, 228, 101, 38, 110, 111, 231,
159, 222, 214, 87, 207, 135, 134, 14, 205, 133, 132, 12, 84, 21, 29, 156,
155, 218, 210, 83, 203, 131, 130, 10, 201, 129, 128, 8, 80, 17, 25, 152,
32, 104, 105, 225, 98, 35, 43, 170, 114, 51, 59, 186, 249, 177, 176, 56,
191, 254, 246, 119, 239, 167, 166, 46, 237, 165, 164, 44, 116, 53, 61, 188,
251, 179, 178, 58, 185, 248, 240, 113, 169, 232, 224, 97, 34, 106, 107, 227,
219, 147, 146, 26, 153, 216, 208, 81, 137, 200, 192, 65, 2, 74, 75, 195,
68, 5, 13, 140, 20, 92, 93, 213, 22, 94, 95, 215, 143, 206, 198, 71
};
};
}
template <UInt8 bit_step>
class FunctionHilbertDecode2DWIthLookupTableImpl
{
static_assert(bit_step <= 3, "bit_step should not be more than 3 to fit in UInt8");
public:
static std::tuple<UInt64, UInt64> decode(UInt64 hilbert_code)
{
UInt64 x = 0;
UInt64 y = 0;
const auto leading_zeros_count = getLeadingZeroBits(hilbert_code);
const auto used_bits = std::numeric_limits<UInt64>::digits - leading_zeros_count;
auto [current_shift, state] = getInitialShiftAndState(used_bits);
while (current_shift >= 0)
{
const UInt8 hilbert_bits = (hilbert_code >> current_shift) & HILBERT_MASK;
const auto [x_bits, y_bits] = getCodeAndUpdateState(hilbert_bits, state);
x |= (x_bits << (current_shift >> 1));
y |= (y_bits << (current_shift >> 1));
current_shift -= getHilbertShift(bit_step);
}
return {x, y};
}
private:
// for bit_step = 3
// LOOKUP_TABLE[SSHHHHHH] = SSXXXYYY
// where SS - 2 bits for state, XXX - 3 bits of x, YYY - 3 bits of y
// State is rotation of curve on every step, left/up/right/down - therefore 2 bits
static std::pair<UInt64, UInt64> getCodeAndUpdateState(UInt8 hilbert_bits, UInt8& state)
{
const UInt8 table_index = state | hilbert_bits;
const auto table_code = HilbertDetails::HilbertDecodeLookupTable<bit_step>::LOOKUP_TABLE[table_index];
state = table_code & STATE_MASK;
const UInt64 x_bits = (table_code & X_MASK) >> bit_step;
const UInt64 y_bits = table_code & Y_MASK;
return {x_bits, y_bits};
}
// hilbert code is double size of input values
static constexpr UInt8 getHilbertShift(UInt8 shift)
{
return shift << 1;
}
static std::pair<Int8, UInt8> getInitialShiftAndState(UInt8 used_bits)
{
UInt8 iterations = used_bits / HILBERT_SHIFT;
Int8 initial_shift = iterations * HILBERT_SHIFT;
if (initial_shift < used_bits)
{
++iterations;
}
else
{
initial_shift -= HILBERT_SHIFT;
}
UInt8 state = iterations % 2 == 0 ? LEFT_STATE : DEFAULT_STATE;
return {initial_shift, state};
}
constexpr static UInt8 STEP_MASK = (1 << bit_step) - 1;
constexpr static UInt8 HILBERT_SHIFT = getHilbertShift(bit_step);
constexpr static UInt8 HILBERT_MASK = (1 << HILBERT_SHIFT) - 1;
constexpr static UInt8 STATE_MASK = 0b11 << HILBERT_SHIFT;
constexpr static UInt8 Y_MASK = STEP_MASK;
constexpr static UInt8 X_MASK = STEP_MASK << bit_step;
constexpr static UInt8 LEFT_STATE = 0b01 << HILBERT_SHIFT;
constexpr static UInt8 DEFAULT_STATE = bit_step % 2 == 0 ? LEFT_STATE : 0;
};
class FunctionHilbertDecode : public FunctionSpaceFillingCurveDecode<2, 0, 32>
{
public:
static constexpr auto name = "hilbertDecode";
static FunctionPtr create(ContextPtr)
{
return std::make_shared<FunctionHilbertDecode>();
}
String getName() const override { return name; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
size_t num_dimensions;
const auto * col_const = typeid_cast<const ColumnConst *>(arguments[0].column.get());
const auto * mask = typeid_cast<const ColumnTuple *>(col_const->getDataColumnPtr().get());
if (mask)
num_dimensions = mask->tupleSize();
else
num_dimensions = col_const->getUInt(0);
auto non_const_arguments = arguments;
non_const_arguments[1].column = non_const_arguments[1].column->convertToFullColumnIfConst();
const ColumnPtr & col_code = non_const_arguments[1].column;
Columns tuple_columns(num_dimensions);
const auto shrink = [mask](const UInt64 value, const UInt8 column_id)
{
if (mask)
return value >> mask->getColumn(column_id).getUInt(0);
return value;
};
auto col0 = ColumnUInt64::create();
auto & vec0 = col0->getData();
vec0.resize(input_rows_count);
if (num_dimensions == 1)
{
for (size_t i = 0; i < input_rows_count; i++)
{
vec0[i] = shrink(col_code->getUInt(i), 0);
}
tuple_columns[0] = std::move(col0);
return ColumnTuple::create(tuple_columns);
}
auto col1 = ColumnUInt64::create();
auto & vec1 = col1->getData();
vec1.resize(input_rows_count);
if (num_dimensions == 2)
{
for (size_t i = 0; i < input_rows_count; i++)
{
const auto res = FunctionHilbertDecode2DWIthLookupTableImpl<3>::decode(col_code->getUInt(i));
vec0[i] = shrink(std::get<0>(res), 0);
vec1[i] = shrink(std::get<1>(res), 1);
}
tuple_columns[0] = std::move(col0);
tuple_columns[1] = std::move(col1);
return ColumnTuple::create(tuple_columns);
}
return ColumnTuple::create(tuple_columns);
}
};
REGISTER_FUNCTION(HilbertDecode)
{
factory.registerFunction<FunctionHilbertDecode>(FunctionDocumentation{
.description=R"(
Decodes Hilbert Curve code into the corresponding unsigned integer tuple
Decodes a Hilbert curve index back into a tuple of unsigned integers, representing coordinates in multi-dimensional space.
The function has two modes of operation:
- Simple
- Expanded
Simple: accepts a resulting tuple size as a first argument and the code as a second argument.
Simple Mode: Accepts the desired tuple size as the first argument (up to 2) and the Hilbert index as the second argument. This mode decodes the index into a tuple of the specified size.
[example:simple]
Will decode into: `(8, 0)`
The resulting tuple size cannot be more than 2
Expanded: accepts a range mask (tuple) as a first argument and the code as a second argument.
Each number in mask configures the amount of bits that corresponding argument will be shifted right
Expanded Mode: Takes a range mask (tuple) as the first argument and the Hilbert index as the second argument.
Each number in the mask specifies the number of bits by which the corresponding decoded argument will be right-shifted, effectively scaling down the output values.
[example:range_shrank]
Note: see hilbertEncode() docs on why range change might be beneficial.
Still limited to 2 numbers at most.
@ -30,7 +240,7 @@ Hilbert code for one argument is always the argument itself (as a tuple).
[example:identity]
Produces: `(1)`
You can shrink one argument too:
A single argument with a tuple specifying bit shifts will be right-shifted accordingly.
[example:identity_shrank]
Produces: `(128)`

View File

@ -1,221 +0,0 @@
#pragma once
#include <Columns/ColumnConst.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnsNumber.h>
#include <Common/BitHelpers.h>
#include <Functions/FunctionSpaceFillingCurve.h>
#include <Functions/IFunction.h>
#include <Functions/PerformanceAdaptors.h>
#include <limits>
namespace DB
{
namespace HilbertDetails
{
template <UInt8 bit_step>
class HilbertDecodeLookupTable
{
public:
constexpr static UInt8 LOOKUP_TABLE[0] = {};
};
template <>
class HilbertDecodeLookupTable<1>
{
public:
constexpr static UInt8 LOOKUP_TABLE[16] = {
4, 1, 3, 10,
0, 6, 7, 13,
15, 9, 8, 2,
11, 14, 12, 5
};
};
template <>
class HilbertDecodeLookupTable<2>
{
public:
constexpr static UInt8 LOOKUP_TABLE[64] = {
0, 20, 21, 49, 18, 3, 7, 38,
26, 11, 15, 46, 61, 41, 40, 12,
16, 1, 5, 36, 8, 28, 29, 57,
10, 30, 31, 59, 39, 54, 50, 19,
47, 62, 58, 27, 55, 35, 34, 6,
53, 33, 32, 4, 24, 9, 13, 44,
63, 43, 42, 14, 45, 60, 56, 25,
37, 52, 48, 17, 2, 22, 23, 51
};
};
template <>
class HilbertDecodeLookupTable<3>
{
public:
constexpr static UInt8 LOOKUP_TABLE[256] = {
64, 1, 9, 136, 16, 88, 89, 209, 18, 90, 91, 211, 139, 202, 194, 67,
4, 76, 77, 197, 70, 7, 15, 142, 86, 23, 31, 158, 221, 149, 148, 28,
36, 108, 109, 229, 102, 39, 47, 174, 118, 55, 63, 190, 253, 181, 180, 60,
187, 250, 242, 115, 235, 163, 162, 42, 233, 161, 160, 40, 112, 49, 57, 184,
0, 72, 73, 193, 66, 3, 11, 138, 82, 19, 27, 154, 217, 145, 144, 24,
96, 33, 41, 168, 48, 120, 121, 241, 50, 122, 123, 243, 171, 234, 226, 99,
100, 37, 45, 172, 52, 124, 125, 245, 54, 126, 127, 247, 175, 238, 230, 103,
223, 151, 150, 30, 157, 220, 212, 85, 141, 204, 196, 69, 6, 78, 79, 199,
255, 183, 182, 62, 189, 252, 244, 117, 173, 236, 228, 101, 38, 110, 111, 231,
159, 222, 214, 87, 207, 135, 134, 14, 205, 133, 132, 12, 84, 21, 29, 156,
155, 218, 210, 83, 203, 131, 130, 10, 201, 129, 128, 8, 80, 17, 25, 152,
32, 104, 105, 225, 98, 35, 43, 170, 114, 51, 59, 186, 249, 177, 176, 56,
191, 254, 246, 119, 239, 167, 166, 46, 237, 165, 164, 44, 116, 53, 61, 188,
251, 179, 178, 58, 185, 248, 240, 113, 169, 232, 224, 97, 34, 106, 107, 227,
219, 147, 146, 26, 153, 216, 208, 81, 137, 200, 192, 65, 2, 74, 75, 195,
68, 5, 13, 140, 20, 92, 93, 213, 22, 94, 95, 215, 143, 206, 198, 71
};
};
}
template <UInt8 bit_step>
class FunctionHilbertDecode2DWIthLookupTableImpl
{
static_assert(bit_step <= 3, "bit_step should not be more than 3 to fit in UInt8");
public:
static std::tuple<UInt64, UInt64> decode(UInt64 hilbert_code)
{
UInt64 x = 0;
UInt64 y = 0;
const auto leading_zeros_count = getLeadingZeroBits(hilbert_code);
const auto used_bits = std::numeric_limits<UInt64>::digits - leading_zeros_count;
auto [current_shift, state] = getInitialShiftAndState(used_bits);
while (current_shift >= 0)
{
const UInt8 hilbert_bits = (hilbert_code >> current_shift) & HILBERT_MASK;
const auto [x_bits, y_bits] = getCodeAndUpdateState(hilbert_bits, state);
x |= (x_bits << (current_shift >> 1));
y |= (y_bits << (current_shift >> 1));
current_shift -= getHilbertShift(bit_step);
}
return {x, y};
}
private:
// for bit_step = 3
// LOOKUP_TABLE[SSHHHHHH] = SSXXXYYY
// where SS - 2 bits for state, XXX - 3 bits of x, YYY - 3 bits of y
// State is rotation of curve on every step, left/up/right/down - therefore 2 bits
static std::pair<UInt64, UInt64> getCodeAndUpdateState(UInt8 hilbert_bits, UInt8& state)
{
const UInt8 table_index = state | hilbert_bits;
const auto table_code = HilbertDetails::HilbertDecodeLookupTable<bit_step>::LOOKUP_TABLE[table_index];
state = table_code & STATE_MASK;
const UInt64 x_bits = (table_code & X_MASK) >> bit_step;
const UInt64 y_bits = table_code & Y_MASK;
return {x_bits, y_bits};
}
// hilbert code is double size of input values
static constexpr UInt8 getHilbertShift(UInt8 shift)
{
return shift << 1;
}
static std::pair<Int8, UInt8> getInitialShiftAndState(UInt8 used_bits)
{
UInt8 iterations = used_bits / HILBERT_SHIFT;
Int8 initial_shift = iterations * HILBERT_SHIFT;
if (initial_shift < used_bits)
{
++iterations;
}
else
{
initial_shift -= HILBERT_SHIFT;
}
UInt8 state = iterations % 2 == 0 ? LEFT_STATE : DEFAULT_STATE;
return {initial_shift, state};
}
constexpr static UInt8 STEP_MASK = (1 << bit_step) - 1;
constexpr static UInt8 HILBERT_SHIFT = getHilbertShift(bit_step);
constexpr static UInt8 HILBERT_MASK = (1 << HILBERT_SHIFT) - 1;
constexpr static UInt8 STATE_MASK = 0b11 << HILBERT_SHIFT;
constexpr static UInt8 Y_MASK = STEP_MASK;
constexpr static UInt8 X_MASK = STEP_MASK << bit_step;
constexpr static UInt8 LEFT_STATE = 0b01 << HILBERT_SHIFT;
constexpr static UInt8 DEFAULT_STATE = bit_step % 2 == 0 ? LEFT_STATE : 0;
};
class FunctionHilbertDecode : public FunctionSpaceFillingCurveDecode<2, 0, 32>
{
public:
static constexpr auto name = "hilbertDecode";
static FunctionPtr create(ContextPtr)
{
return std::make_shared<FunctionHilbertDecode>();
}
String getName() const override { return name; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
size_t nd;
const auto * col_const = typeid_cast<const ColumnConst *>(arguments[0].column.get());
const auto * mask = typeid_cast<const ColumnTuple *>(col_const->getDataColumnPtr().get());
if (mask)
nd = mask->tupleSize();
else
nd = col_const->getUInt(0);
auto non_const_arguments = arguments;
non_const_arguments[1].column = non_const_arguments[1].column->convertToFullColumnIfConst();
const ColumnPtr & col_code = non_const_arguments[1].column;
Columns tuple_columns(nd);
const auto shrink = [mask](const UInt64 value, const UInt8 column_id)
{
if (mask)
return value >> mask->getColumn(column_id).getUInt(0);
return value;
};
auto col0 = ColumnUInt64::create();
auto & vec0 = col0->getData();
vec0.resize(input_rows_count);
if (nd == 1)
{
for (size_t i = 0; i < input_rows_count; i++)
{
vec0[i] = shrink(col_code->getUInt(i), 0);
}
tuple_columns[0] = std::move(col0);
return ColumnTuple::create(tuple_columns);
}
auto col1 = ColumnUInt64::create();
auto & vec1 = col1->getData();
vec1.resize(input_rows_count);
if (nd == 2)
{
for (size_t i = 0; i < input_rows_count; i++)
{
const auto res = FunctionHilbertDecode2DWIthLookupTableImpl<3>::decode(col_code->getUInt(i));
vec0[i] = shrink(std::get<0>(res), 0);
vec1[i] = shrink(std::get<1>(res), 1);
}
tuple_columns[0] = std::move(col0);
tuple_columns[1] = std::move(col1);
return ColumnTuple::create(tuple_columns);
}
return ColumnTuple::create(tuple_columns);
}
};
}

View File

@ -1,15 +1,238 @@
#include <Functions/hilbertEncode.h>
#include <Common/BitHelpers.h>
#include <Functions/FunctionSpaceFillingCurve.h>
#include <Functions/PerformanceAdaptors.h>
#include <limits>
#include <optional>
#include <Functions/FunctionFactory.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ARGUMENT_OUT_OF_BOUND;
}
namespace HilbertDetails
{
template <UInt8 bit_step>
class HilbertEncodeLookupTable
{
public:
constexpr static UInt8 LOOKUP_TABLE[0] = {};
};
template <>
class HilbertEncodeLookupTable<1>
{
public:
constexpr static UInt8 LOOKUP_TABLE[16] = {
4, 1, 11, 2,
0, 15, 5, 6,
10, 9, 3, 12,
14, 7, 13, 8
};
};
template <>
class HilbertEncodeLookupTable<2>
{
public:
constexpr static UInt8 LOOKUP_TABLE[64] = {
0, 51, 20, 5, 17, 18, 39, 6,
46, 45, 24, 9, 15, 60, 43, 10,
16, 1, 62, 31, 35, 2, 61, 44,
4, 55, 8, 59, 21, 22, 25, 26,
42, 41, 38, 37, 11, 56, 7, 52,
28, 13, 50, 19, 47, 14, 49, 32,
58, 27, 12, 63, 57, 40, 29, 30,
54, 23, 34, 33, 53, 36, 3, 48
};
};
template <>
class HilbertEncodeLookupTable<3>
{
public:
constexpr static UInt8 LOOKUP_TABLE[256] = {
64, 1, 206, 79, 16, 211, 84, 21, 131, 2, 205, 140, 81, 82, 151, 22, 4,
199, 8, 203, 158, 157, 88, 25, 69, 70, 73, 74, 31, 220, 155, 26, 186,
185, 182, 181, 32, 227, 100, 37, 59, 248, 55, 244, 97, 98, 167, 38, 124,
61, 242, 115, 174, 173, 104, 41, 191, 62, 241, 176, 47, 236, 171, 42, 0,
195, 68, 5, 250, 123, 60, 255, 65, 66, 135, 6, 249, 184, 125, 126, 142,
141, 72, 9, 246, 119, 178, 177, 15, 204, 139, 10, 245, 180, 51, 240, 80,
17, 222, 95, 96, 33, 238, 111, 147, 18, 221, 156, 163, 34, 237, 172, 20,
215, 24, 219, 36, 231, 40, 235, 85, 86, 89, 90, 101, 102, 105, 106, 170,
169, 166, 165, 154, 153, 150, 149, 43, 232, 39, 228, 27, 216, 23, 212, 108,
45, 226, 99, 92, 29, 210, 83, 175, 46, 225, 160, 159, 30, 209, 144, 48,
243, 116, 53, 202, 75, 12, 207, 113, 114, 183, 54, 201, 136, 77, 78, 190,
189, 120, 57, 198, 71, 130, 129, 63, 252, 187, 58, 197, 132, 3, 192, 234,
107, 44, 239, 112, 49, 254, 127, 233, 168, 109, 110, 179, 50, 253, 188, 230,
103, 162, 161, 52, 247, 56, 251, 229, 164, 35, 224, 117, 118, 121, 122, 218,
91, 28, 223, 138, 137, 134, 133, 217, 152, 93, 94, 11, 200, 7, 196, 214,
87, 146, 145, 76, 13, 194, 67, 213, 148, 19, 208, 143, 14, 193, 128,
};
};
}
template <UInt8 bit_step>
class FunctionHilbertEncode2DWIthLookupTableImpl
{
static_assert(bit_step <= 3, "bit_step should not be more than 3 to fit in UInt8");
public:
static UInt64 encode(UInt64 x, UInt64 y)
{
UInt64 hilbert_code = 0;
const auto leading_zeros_count = getLeadingZeroBits(x | y);
const auto used_bits = std::numeric_limits<UInt64>::digits - leading_zeros_count;
auto [current_shift, state] = getInitialShiftAndState(used_bits);
while (current_shift >= 0)
{
const UInt8 x_bits = (x >> current_shift) & STEP_MASK;
const UInt8 y_bits = (y >> current_shift) & STEP_MASK;
const auto hilbert_bits = getCodeAndUpdateState(x_bits, y_bits, state);
hilbert_code |= (hilbert_bits << getHilbertShift(current_shift));
current_shift -= bit_step;
}
return hilbert_code;
}
private:
// for bit_step = 3
// LOOKUP_TABLE[SSXXXYYY] = SSHHHHHH
// where SS - 2 bits for state, XXX - 3 bits of x, YYY - 3 bits of y
// State is rotation of curve on every step, left/up/right/down - therefore 2 bits
static UInt64 getCodeAndUpdateState(UInt8 x_bits, UInt8 y_bits, UInt8& state)
{
const UInt8 table_index = state | (x_bits << bit_step) | y_bits;
const auto table_code = HilbertDetails::HilbertEncodeLookupTable<bit_step>::LOOKUP_TABLE[table_index];
state = table_code & STATE_MASK;
return table_code & HILBERT_MASK;
}
// hilbert code is double size of input values
static constexpr UInt8 getHilbertShift(UInt8 shift)
{
return shift << 1;
}
static std::pair<Int8, UInt8> getInitialShiftAndState(UInt8 used_bits)
{
UInt8 iterations = used_bits / bit_step;
Int8 initial_shift = iterations * bit_step;
if (initial_shift < used_bits)
{
++iterations;
}
else
{
initial_shift -= bit_step;
}
UInt8 state = iterations % 2 == 0 ? LEFT_STATE : DEFAULT_STATE;
return {initial_shift, state};
}
constexpr static UInt8 STEP_MASK = (1 << bit_step) - 1;
constexpr static UInt8 HILBERT_SHIFT = getHilbertShift(bit_step);
constexpr static UInt8 HILBERT_MASK = (1 << HILBERT_SHIFT) - 1;
constexpr static UInt8 STATE_MASK = 0b11 << HILBERT_SHIFT;
constexpr static UInt8 LEFT_STATE = 0b01 << HILBERT_SHIFT;
constexpr static UInt8 DEFAULT_STATE = bit_step % 2 == 0 ? LEFT_STATE : 0;
};
class FunctionHilbertEncode : public FunctionSpaceFillingCurveEncode
{
public:
static constexpr auto name = "hilbertEncode";
static FunctionPtr create(ContextPtr)
{
return std::make_shared<FunctionHilbertEncode>();
}
String getName() const override { return name; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
size_t num_dimensions = arguments.size();
size_t vector_start_index = 0;
const auto * const_col = typeid_cast<const ColumnConst *>(arguments[0].column.get());
const ColumnTuple * mask;
if (const_col)
mask = typeid_cast<const ColumnTuple *>(const_col->getDataColumnPtr().get());
else
mask = typeid_cast<const ColumnTuple *>(arguments[0].column.get());
if (mask)
{
num_dimensions = mask->tupleSize();
vector_start_index = 1;
for (size_t i = 0; i < num_dimensions; i++)
{
auto ratio = mask->getColumn(i).getUInt(0);
if (ratio > 32)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Illegal argument {} of function {}, should be a number in range 0-32",
arguments[0].column->getName(), getName());
}
}
auto non_const_arguments = arguments;
for (auto & argument : non_const_arguments)
argument.column = argument.column->convertToFullColumnIfConst();
auto col_res = ColumnUInt64::create();
ColumnUInt64::Container & vec_res = col_res->getData();
vec_res.resize(input_rows_count);
const auto expand = [mask](const UInt64 value, const UInt8 column_id)
{
if (mask)
return value << mask->getColumn(column_id).getUInt(0);
return value;
};
const ColumnPtr & col0 = non_const_arguments[0 + vector_start_index].column;
if (num_dimensions == 1)
{
for (size_t i = 0; i < input_rows_count; ++i)
{
vec_res[i] = expand(col0->getUInt(i), 0);
}
return col_res;
}
const ColumnPtr & col1 = non_const_arguments[1 + vector_start_index].column;
if (num_dimensions == 2)
{
for (size_t i = 0; i < input_rows_count; ++i)
{
vec_res[i] = FunctionHilbertEncode2DWIthLookupTableImpl<3>::encode(
expand(col0->getUInt(i), 0),
expand(col1->getUInt(i), 1));
}
return col_res;
}
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal number of UInt arguments of function {}: should be not more than 2 dimensions",
getName());
}
};
REGISTER_FUNCTION(HilbertEncode)
{
factory.registerFunction<FunctionHilbertEncode>(FunctionDocumentation{
.description=R"(
Calculates a Hilbert curve index for a list of unsigned integers to map multidimensional data to a one-dimensional integer space.
Calculates code for Hilbert Curve for a list of unsigned integers
The function has two modes of operation:
- Simple
@ -17,20 +240,22 @@ The function has two modes of operation:
Simple: accepts up to 2 unsigned integers as arguments and produces a UInt64 code.
[example:simple]
Produces: `31`
Expanded: accepts a range mask (tuple) as a first argument and up to 2 unsigned integers as other arguments.
Each number in mask configures the amount of bits that corresponding argument will be shifted left
Each number in the mask configures the number of bits by which the corresponding argument will be shifted left, effectively scaling the argument within its range.
[example:range_expanded]
Produces: `4031541586602`
Note: tuple size must be equal to the number of the other arguments
Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality)
For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF)
Hilbert encoding for one argument is always the argument itself.
For a single argument without a tuple, the function returns the argument itself as the Hilbert index, since no dimensional mapping is needed.
[example:identity]
Produces: `1`
You can expand one argument too:
If a single argument is provided with a tuple specifying bit shifts, the function shifts the argument left by the specified number of bits.
[example:identity_expanded]
Produces: `512`
@ -40,13 +265,13 @@ The function also accepts columns as arguments:
But the range tuple must still be a constant:
[example:from_table_range]
Please note that you can fit only so much bits of information into Morton code as UInt64 has.
Please note that you can fit only so much bits of information into Hilbert code as UInt64 has.
Two arguments will have a range of maximum 2^32 (64/2) each
All overflow will be clamped to zero
)",
.examples{
{"simple", "SELECT hilbertEncode(1, 2, 3)", ""},
{"range_expanded", "SELECT hilbertEncode((1,6), 1024, 16)", ""},
{"simple", "SELECT hilbertEncode(3, 4)", ""},
{"range_expanded", "SELECT hilbertEncode((10,6), 1024, 16)", ""},
{"identity", "SELECT hilbertEncode(1)", ""},
{"identity_expanded", "SELECT hilbertEncode(tuple(2), 128)", ""},
{"from_table", "SELECT hilbertEncode(n1, n2) FROM table", ""},

View File

@ -1,234 +0,0 @@
#pragma once
#include <Columns/ColumnConst.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnsNumber.h>
#include <Common/BitHelpers.h>
#include <Functions/FunctionSpaceFillingCurve.h>
#include <Functions/IFunction.h>
#include <Functions/PerformanceAdaptors.h>
#include <limits>
#include <optional>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ARGUMENT_OUT_OF_BOUND;
}
namespace HilbertDetails
{
template <UInt8 bit_step>
class HilbertEncodeLookupTable
{
public:
constexpr static UInt8 LOOKUP_TABLE[0] = {};
};
template <>
class HilbertEncodeLookupTable<1>
{
public:
constexpr static UInt8 LOOKUP_TABLE[16] = {
4, 1, 11, 2,
0, 15, 5, 6,
10, 9, 3, 12,
14, 7, 13, 8
};
};
template <>
class HilbertEncodeLookupTable<2>
{
public:
constexpr static UInt8 LOOKUP_TABLE[64] = {
0, 51, 20, 5, 17, 18, 39, 6,
46, 45, 24, 9, 15, 60, 43, 10,
16, 1, 62, 31, 35, 2, 61, 44,
4, 55, 8, 59, 21, 22, 25, 26,
42, 41, 38, 37, 11, 56, 7, 52,
28, 13, 50, 19, 47, 14, 49, 32,
58, 27, 12, 63, 57, 40, 29, 30,
54, 23, 34, 33, 53, 36, 3, 48
};
};
template <>
class HilbertEncodeLookupTable<3>
{
public:
constexpr static UInt8 LOOKUP_TABLE[256] = {
64, 1, 206, 79, 16, 211, 84, 21, 131, 2, 205, 140, 81, 82, 151, 22, 4,
199, 8, 203, 158, 157, 88, 25, 69, 70, 73, 74, 31, 220, 155, 26, 186,
185, 182, 181, 32, 227, 100, 37, 59, 248, 55, 244, 97, 98, 167, 38, 124,
61, 242, 115, 174, 173, 104, 41, 191, 62, 241, 176, 47, 236, 171, 42, 0,
195, 68, 5, 250, 123, 60, 255, 65, 66, 135, 6, 249, 184, 125, 126, 142,
141, 72, 9, 246, 119, 178, 177, 15, 204, 139, 10, 245, 180, 51, 240, 80,
17, 222, 95, 96, 33, 238, 111, 147, 18, 221, 156, 163, 34, 237, 172, 20,
215, 24, 219, 36, 231, 40, 235, 85, 86, 89, 90, 101, 102, 105, 106, 170,
169, 166, 165, 154, 153, 150, 149, 43, 232, 39, 228, 27, 216, 23, 212, 108,
45, 226, 99, 92, 29, 210, 83, 175, 46, 225, 160, 159, 30, 209, 144, 48,
243, 116, 53, 202, 75, 12, 207, 113, 114, 183, 54, 201, 136, 77, 78, 190,
189, 120, 57, 198, 71, 130, 129, 63, 252, 187, 58, 197, 132, 3, 192, 234,
107, 44, 239, 112, 49, 254, 127, 233, 168, 109, 110, 179, 50, 253, 188, 230,
103, 162, 161, 52, 247, 56, 251, 229, 164, 35, 224, 117, 118, 121, 122, 218,
91, 28, 223, 138, 137, 134, 133, 217, 152, 93, 94, 11, 200, 7, 196, 214,
87, 146, 145, 76, 13, 194, 67, 213, 148, 19, 208, 143, 14, 193, 128,
};
};
}
template <UInt8 bit_step>
class FunctionHilbertEncode2DWIthLookupTableImpl
{
static_assert(bit_step <= 3, "bit_step should not be more than 3 to fit in UInt8");
public:
static UInt64 encode(UInt64 x, UInt64 y)
{
UInt64 hilbert_code = 0;
const auto leading_zeros_count = getLeadingZeroBits(x | y);
const auto used_bits = std::numeric_limits<UInt64>::digits - leading_zeros_count;
auto [current_shift, state] = getInitialShiftAndState(used_bits);
while (current_shift >= 0)
{
const UInt8 x_bits = (x >> current_shift) & STEP_MASK;
const UInt8 y_bits = (y >> current_shift) & STEP_MASK;
const auto hilbert_bits = getCodeAndUpdateState(x_bits, y_bits, state);
hilbert_code |= (hilbert_bits << getHilbertShift(current_shift));
current_shift -= bit_step;
}
return hilbert_code;
}
private:
// for bit_step = 3
// LOOKUP_TABLE[SSXXXYYY] = SSHHHHHH
// where SS - 2 bits for state, XXX - 3 bits of x, YYY - 3 bits of y
// State is rotation of curve on every step, left/up/right/down - therefore 2 bits
static UInt64 getCodeAndUpdateState(UInt8 x_bits, UInt8 y_bits, UInt8& state)
{
const UInt8 table_index = state | (x_bits << bit_step) | y_bits;
const auto table_code = HilbertDetails::HilbertEncodeLookupTable<bit_step>::LOOKUP_TABLE[table_index];
state = table_code & STATE_MASK;
return table_code & HILBERT_MASK;
}
// hilbert code is double size of input values
static constexpr UInt8 getHilbertShift(UInt8 shift)
{
return shift << 1;
}
static std::pair<Int8, UInt8> getInitialShiftAndState(UInt8 used_bits)
{
UInt8 iterations = used_bits / bit_step;
Int8 initial_shift = iterations * bit_step;
if (initial_shift < used_bits)
{
++iterations;
}
else
{
initial_shift -= bit_step;
}
UInt8 state = iterations % 2 == 0 ? LEFT_STATE : DEFAULT_STATE;
return {initial_shift, state};
}
constexpr static UInt8 STEP_MASK = (1 << bit_step) - 1;
constexpr static UInt8 HILBERT_SHIFT = getHilbertShift(bit_step);
constexpr static UInt8 HILBERT_MASK = (1 << HILBERT_SHIFT) - 1;
constexpr static UInt8 STATE_MASK = 0b11 << HILBERT_SHIFT;
constexpr static UInt8 LEFT_STATE = 0b01 << HILBERT_SHIFT;
constexpr static UInt8 DEFAULT_STATE = bit_step % 2 == 0 ? LEFT_STATE : 0;
};
class FunctionHilbertEncode : public FunctionSpaceFillingCurveEncode
{
public:
static constexpr auto name = "hilbertEncode";
static FunctionPtr create(ContextPtr)
{
return std::make_shared<FunctionHilbertEncode>();
}
String getName() const override { return name; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
size_t num_dimensions = arguments.size();
size_t vector_start_index = 0;
const auto * const_col = typeid_cast<const ColumnConst *>(arguments[0].column.get());
const ColumnTuple * mask;
if (const_col)
mask = typeid_cast<const ColumnTuple *>(const_col->getDataColumnPtr().get());
else
mask = typeid_cast<const ColumnTuple *>(arguments[0].column.get());
if (mask)
{
num_dimensions = mask->tupleSize();
vector_start_index = 1;
for (size_t i = 0; i < num_dimensions; i++)
{
auto ratio = mask->getColumn(i).getUInt(0);
if (ratio > 32)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Illegal argument {} of function {}, should be a number in range 0-32",
arguments[0].column->getName(), getName());
}
}
auto non_const_arguments = arguments;
for (auto & argument : non_const_arguments)
argument.column = argument.column->convertToFullColumnIfConst();
auto col_res = ColumnUInt64::create();
ColumnUInt64::Container & vec_res = col_res->getData();
vec_res.resize(input_rows_count);
const auto expand = [mask](const UInt64 value, const UInt8 column_id)
{
if (mask)
return value << mask->getColumn(column_id).getUInt(0);
return value;
};
const ColumnPtr & col0 = non_const_arguments[0 + vector_start_index].column;
if (num_dimensions == 1)
{
for (size_t i = 0; i < input_rows_count; ++i)
{
vec_res[i] = expand(col0->getUInt(i), 0);
}
return col_res;
}
const ColumnPtr & col1 = non_const_arguments[1 + vector_start_index].column;
if (num_dimensions == 2)
{
for (size_t i = 0; i < input_rows_count; ++i)
{
vec_res[i] = FunctionHilbertEncode2DWIthLookupTableImpl<3>::encode(
expand(col0->getUInt(i), 0),
expand(col1->getUInt(i), 1));
}
return col_res;
}
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal number of UInt arguments of function {}: should be not more than 2 dimensions",
getName());
}
};
}