Cosmetics, pt. VI

This commit is contained in:
Robert Schulze 2024-05-23 15:53:14 +00:00
parent e6f135089f
commit 4611a44c1f
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
2 changed files with 60 additions and 65 deletions

View File

@ -27,7 +27,7 @@ namespace
- The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970)
- The middle 10 bits are the machine ID
- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by differen processes
- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by different processes
*/
/// bit counts
@ -36,14 +36,13 @@ constexpr auto machine_id_bits_count = 10;
constexpr auto machine_seq_num_bits_count = 12;
/// bits masks for Snowflake ID components
// constexpr uint64_t timestamp_mask = ((1ULL << timestamp_bits_count) - 1) << (machine_id_bits_count + machine_seq_num_bits_count); // unused
constexpr uint64_t machine_id_mask = ((1ULL << machine_id_bits_count) - 1) << machine_seq_num_bits_count;
constexpr uint64_t machine_seq_num_mask = (1ULL << machine_seq_num_bits_count) - 1;
constexpr uint64_t machine_id_mask = ((1ull << machine_id_bits_count) - 1) << machine_seq_num_bits_count;
constexpr uint64_t machine_seq_num_mask = (1ull << machine_seq_num_bits_count) - 1;
/// max values
constexpr uint64_t max_machine_seq_num = machine_seq_num_mask;
uint64_t getMachineID()
uint64_t getMachineId()
{
UUID server_uuid = ServerUUID::get();
/// hash into 64 bits
@ -57,48 +56,44 @@ uint64_t getTimestamp()
{
auto now = std::chrono::system_clock::now();
auto ticks_since_epoch = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
return static_cast<uint64_t>(ticks_since_epoch) & ((1ULL << timestamp_bits_count) - 1);
return static_cast<uint64_t>(ticks_since_epoch) & ((1ull << timestamp_bits_count) - 1);
}
struct SnowflakeComponents {
struct SnowflakeId
{
uint64_t timestamp;
uint64_t machind_id;
uint64_t machine_seq_num;
};
SnowflakeComponents toComponents(uint64_t snowflake) {
return {
.timestamp = (snowflake >> (machine_id_bits_count + machine_seq_num_bits_count)),
.machind_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count),
.machine_seq_num = (snowflake & machine_seq_num_mask)
};
SnowflakeId toSnowflakeId(uint64_t snowflake)
{
return {.timestamp = (snowflake >> (machine_id_bits_count + machine_seq_num_bits_count)),
.machind_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count),
.machine_seq_num = (snowflake & machine_seq_num_mask)};
}
uint64_t toSnowflakeID(SnowflakeComponents components) {
uint64_t fromSnowflakeId(SnowflakeId components)
{
return (components.timestamp << (machine_id_bits_count + machine_seq_num_bits_count) |
components.machind_id << (machine_seq_num_bits_count) |
components.machine_seq_num);
}
struct RangeOfSnowflakeIDs {
/// [begin, end)
SnowflakeComponents begin, end;
struct SnowflakeIdRange
{
SnowflakeId begin; /// inclusive
SnowflakeId end; /// exclusive
};
/* Get range of `input_rows_count` Snowflake IDs from `max(available, now)`
1. Calculate Snowflake ID by current timestamp (`now`)
2. `begin = max(available, now)`
3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow
*/
RangeOfSnowflakeIDs getRangeOfAvailableIDs(const SnowflakeComponents& available, size_t input_rows_count)
/// To get the range of `input_rows_count` Snowflake IDs from `max(available, now)`:
/// 1. calculate Snowflake ID by current timestamp (`now`)
/// 2. `begin = max(available, now)`
/// 3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow
SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t input_rows_count)
{
/// 1. `now`
SnowflakeComponents begin = {
.timestamp = getTimestamp(),
.machind_id = getMachineID(),
.machine_seq_num = 0
};
SnowflakeId begin = {.timestamp = getTimestamp(), .machind_id = getMachineId(), .machine_seq_num = 0};
/// 2. `begin`
if (begin.timestamp <= available.timestamp)
@ -108,7 +103,7 @@ RangeOfSnowflakeIDs getRangeOfAvailableIDs(const SnowflakeComponents& available,
}
/// 3. `end = begin + input_rows_count`
SnowflakeComponents end;
SnowflakeId end;
const uint64_t seq_nums_in_current_timestamp_left = (max_machine_seq_num - begin.machine_seq_num + 1);
if (input_rows_count >= seq_nums_in_current_timestamp_left)
/// if sequence numbers in current timestamp is not enough for rows => update timestamp
@ -125,22 +120,22 @@ RangeOfSnowflakeIDs getRangeOfAvailableIDs(const SnowflakeComponents& available,
struct GlobalCounterPolicy
{
static constexpr auto name = "generateSnowflakeID";
static constexpr auto doc_description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
static constexpr auto description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
/// Guarantee counter monotonicity within one timestamp across all threads generating Snowflake IDs simultaneously.
struct Data
{
static inline std::atomic<uint64_t> lowest_available_snowflake_id = 0;
SnowflakeComponents reserveRange(size_t input_rows_count)
SnowflakeId reserveRange(size_t input_rows_count)
{
uint64_t available_snowflake_id = lowest_available_snowflake_id.load();
RangeOfSnowflakeIDs range;
SnowflakeIdRange range;
do
{
range = getRangeOfAvailableIDs(toComponents(available_snowflake_id), input_rows_count);
range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), input_rows_count);
}
while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, toSnowflakeID(range.end)));
while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, fromSnowflakeId(range.end)));
/// if `compare_exhange` failed => another thread updated `lowest_available_snowflake_id` and we should try again
/// completed => range of IDs [begin, end) is reserved, can return the beginning of the range
@ -152,17 +147,17 @@ struct GlobalCounterPolicy
struct ThreadLocalCounterPolicy
{
static constexpr auto name = "generateSnowflakeIDThreadMonotonic";
static constexpr auto doc_description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. This function behaves like generateSnowflakeID but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.)";
static constexpr auto description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. This function behaves like generateSnowflakeID but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.)";
/// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads.
struct Data
{
static inline thread_local uint64_t lowest_available_snowflake_id = 0;
SnowflakeComponents reserveRange(size_t input_rows_count)
SnowflakeId reserveRange(size_t input_rows_count)
{
RangeOfSnowflakeIDs range = getRangeOfAvailableIDs(toComponents(lowest_available_snowflake_id), input_rows_count);
lowest_available_snowflake_id = toSnowflakeID(range.end);
SnowflakeIdRange range = getRangeOfAvailableIds(toSnowflakeId(lowest_available_snowflake_id), input_rows_count);
lowest_available_snowflake_id = fromSnowflakeId(range.end);
return range.begin;
}
};
@ -188,7 +183,7 @@ public:
{
FunctionArgumentDescriptors mandatory_args;
FunctionArgumentDescriptors optional_args{
{"expr", nullptr, nullptr, "Arbitrary Expression"}
{"expr", nullptr, nullptr, "Arbitrary expression"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
@ -200,17 +195,18 @@ public:
auto col_res = ColumnVector<UInt64>::create();
typename ColumnVector<UInt64>::Container & vec_to = col_res->getData();
vec_to.resize(input_rows_count);
if (input_rows_count != 0)
{
vec_to.resize(input_rows_count);
typename FillPolicy::Data data;
/// get the begin of available snowflake ids range
SnowflakeComponents snowflake_id = data.reserveRange(input_rows_count);
SnowflakeId snowflake_id = data.reserveRange(input_rows_count);
for (UInt64 & to_row : vec_to)
{
to_row = toSnowflakeID(snowflake_id);
to_row = fromSnowflakeId(snowflake_id);
if (snowflake_id.machine_seq_num++ == max_machine_seq_num)
{
snowflake_id.machine_seq_num = 0;
@ -225,20 +221,20 @@ public:
};
template<typename FillPolicy>
void registerSnowflakeIDGenerator(auto& factory)
void registerSnowflakeIDGenerator(auto & factory)
{
static constexpr auto doc_syntax_format = "{}([expression])";
static constexpr auto example_format = "SELECT {}()";
static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)";
FunctionDocumentation::Description doc_description = FillPolicy::doc_description;
FunctionDocumentation::Syntax doc_syntax = fmt::format(doc_syntax_format, FillPolicy::name);
FunctionDocumentation::Arguments doc_arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
FunctionDocumentation::ReturnedValue doc_returned_value = "A value of type UInt64";
FunctionDocumentation::Examples doc_examples = {{"uuid", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
FunctionDocumentation::Categories doc_categories = {"Snowflake ID"};
FunctionDocumentation::Description description = FillPolicy::description;
FunctionDocumentation::Syntax syntax = fmt::format(doc_syntax_format, FillPolicy::name);
FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64";
FunctionDocumentation::Examples examples = {{"single", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
FunctionDocumentation::Categories categories = {"Snowflake ID"};
factory.template registerFunction<FunctionGenerateSnowflakeID<FillPolicy>>({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive);
factory.template registerFunction<FunctionGenerateSnowflakeID<FillPolicy>>({description, syntax, arguments, returned_value, examples, categories}, FunctionFactory::CaseInsensitive);
}
REGISTER_FUNCTION(GenerateSnowflakeID)

View File

@ -76,7 +76,7 @@ void setVariant(UUID & uuid)
struct FillAllRandomPolicy
{
static constexpr auto name = "generateUUIDv7NonMonotonic";
static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), and a random field (74 bit, including a 2-bit variant field "2") to distinguish UUIDs within a millisecond. This function is the fastest generateUUIDv7* function but it gives no monotonicity guarantees within a timestamp.)";
static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), and a random field (74 bit, including a 2-bit variant field "2") to distinguish UUIDs within a millisecond. This function is the fastest generateUUIDv7* function but it gives no monotonicity guarantees within a timestamp.)";
struct Data
{
void generate(UUID & uuid, uint64_t ts)
@ -136,7 +136,7 @@ struct CounterFields
struct GlobalCounterPolicy
{
static constexpr auto name = "generateUUIDv7";
static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
/// Guarantee counter monotonicity within one timestamp across all threads generating UUIDv7 simultaneously.
struct Data
@ -159,7 +159,7 @@ struct GlobalCounterPolicy
struct ThreadLocalCounterPolicy
{
static constexpr auto name = "generateUUIDv7ThreadMonotonic";
static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. This function behaves like generateUUIDv7 but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs.)";
static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. This function behaves like generateUUIDv7 but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs.)";
/// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads.
struct Data
@ -186,7 +186,6 @@ class FunctionGenerateUUIDv7Base : public IFunction, public FillPolicy
{
public:
String getName() const final { return FillPolicy::name; }
size_t getNumberOfArguments() const final { return 0; }
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const final { return false; }
@ -198,7 +197,7 @@ public:
{
FunctionArgumentDescriptors mandatory_args;
FunctionArgumentDescriptors optional_args{
{"expr", nullptr, nullptr, "Arbitrary Expression"}
{"expr", nullptr, nullptr, "Arbitrary expression"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
@ -264,20 +263,20 @@ private:
};
template<typename FillPolicy>
void registerUUIDv7Generator(auto& factory)
void registerUUIDv7Generator(auto & factory)
{
static constexpr auto doc_syntax_format = "{}([expression])";
static constexpr auto example_format = "SELECT {}()";
static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)";
FunctionDocumentation::Description doc_description = FillPolicy::doc_description;
FunctionDocumentation::Syntax doc_syntax = fmt::format(doc_syntax_format, FillPolicy::name);
FunctionDocumentation::Arguments doc_arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
FunctionDocumentation::ReturnedValue doc_returned_value = "A value of type UUID version 7.";
FunctionDocumentation::Examples doc_examples = {{"uuid", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
FunctionDocumentation::Categories doc_categories = {"UUID"};
FunctionDocumentation::Description description = FillPolicy::description;
FunctionDocumentation::Syntax syntax = fmt::format(doc_syntax_format, FillPolicy::name);
FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
FunctionDocumentation::ReturnedValue returned_value = "A value of type UUID version 7.";
FunctionDocumentation::Examples examples = {{"single", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
FunctionDocumentation::Categories categories = {"UUID"};
factory.template registerFunction<FunctionGenerateUUIDv7Base<FillPolicy>>({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive);
factory.template registerFunction<FunctionGenerateUUIDv7Base<FillPolicy>>({description, syntax, arguments, returned_value, examples, categories}, FunctionFactory::CaseInsensitive);
}
REGISTER_FUNCTION(GenerateUUIDv7)