Cosmetics, pt. I

This commit is contained in:
Robert Schulze 2024-05-19 18:47:58 +00:00
parent d72cd45bd8
commit 1293a0f795
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A

View File

@ -18,8 +18,7 @@ namespace ErrorCodes
namespace namespace
{ {
/* /* Snowflake ID
Snowflake ID
https://en.wikipedia.org/wiki/Snowflake_ID https://en.wikipedia.org/wiki/Snowflake_ID
0 1 2 3 0 1 2 3
@ -30,35 +29,34 @@ namespace
| | machine_id | machine_seq_num | | | machine_id | machine_seq_num |
- The first 41 (+ 1 top zero bit) bits is timestamp in Unix time milliseconds - The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970)
- The middle 10 bits are the machine ID. - The middle 10 bits are the machine ID
- The last 12 bits decode to number of ids processed by the machine at the given millisecond. - The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by differen processes
*/ */
constexpr auto timestamp_size = 41; constexpr auto timestamp_bits_count = 41;
constexpr auto machine_id_size = 10; constexpr auto machine_id_bits_count = 10;
constexpr auto machine_seq_num_size = 12; constexpr auto machine_seq_num_bits_count = 12;
constexpr int64_t timestamp_mask = ((1LL << timestamp_size) - 1) << (machine_id_size + machine_seq_num_size); constexpr int64_t timestamp_mask = ((1LL << timestamp_bits_count) - 1) << (machine_id_bits_count + machine_seq_num_bits_count);
constexpr int64_t machine_id_mask = ((1LL << machine_id_size) - 1) << machine_seq_num_size; constexpr int64_t machine_id_mask = ((1LL << machine_id_bits_count) - 1) << machine_seq_num_bits_count;
constexpr int64_t machine_seq_num_mask = (1LL << machine_seq_num_size) - 1; constexpr int64_t machine_seq_num_mask = (1LL << machine_seq_num_bits_count) - 1;
constexpr int64_t max_machine_seq_num = machine_seq_num_mask; constexpr int64_t max_machine_seq_num = machine_seq_num_mask;
Int64 getMachineID() Int64 getMachineID()
{ {
auto serverUUID = ServerUUID::get(); UUID server_uuid = ServerUUID::get();
/// hash into 64 bits
// hash serverUUID into 64 bits UInt64 hi = UUIDHelpers::getHighBytes(server_uuid);
Int64 h = UUIDHelpers::getHighBytes(serverUUID); UInt64 lo = UUIDHelpers::getLowBytes(server_uuid);
Int64 l = UUIDHelpers::getLowBytes(serverUUID); return ((hi * 11) ^ (lo * 17)) & machine_id_mask;
return ((h * 11) ^ (l * 17)) & machine_id_mask;
} }
Int64 getTimestamp() Int64 getTimestamp()
{ {
const auto tm_point = std::chrono::system_clock::now(); auto now = std::chrono::system_clock::now();
return std::chrono::duration_cast<std::chrono::milliseconds>( auto ticks_since_epoch = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
tm_point.time_since_epoch()).count() & ((1LL << timestamp_size) - 1); return ticks_since_epoch & ((1LL << timestamp_bits_count) - 1);
} }
} }
@ -66,16 +64,11 @@ Int64 getTimestamp()
class FunctionSnowflakeID : public IFunction class FunctionSnowflakeID : public IFunction
{ {
private: private:
mutable std::atomic<Int64> lowest_available_snowflake_id{0}; mutable std::atomic<Int64> lowest_available_snowflake_id = 0; /// atomic to avoid a mutex
// 1 atomic value because we don't want to use mutex
public: public:
static constexpr auto name = "generateSnowflakeID"; static constexpr auto name = "generateSnowflakeID";
static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared<FunctionSnowflakeID>(); }
static FunctionPtr create(ContextPtr /*context*/)
{
return std::make_shared<FunctionSnowflakeID>();
}
String getName() const override { return name; } String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 0; } size_t getNumberOfArguments() const override { return 0; }
@ -95,31 +88,34 @@ public:
return std::make_shared<DataTypeInt64>(); return std::make_shared<DataTypeInt64>();
} }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override
{ {
auto col_res = ColumnVector<Int64>::create(); auto col_res = ColumnVector<Int64>::create();
typename ColumnVector<Int64>::Container & vec_to = col_res->getData(); typename ColumnVector<Int64>::Container & vec_to = col_res->getData();
Int64 size64 = static_cast<Int64>(input_rows_count);
vec_to.resize(input_rows_count); vec_to.resize(input_rows_count);
if (input_rows_count == 0) { if (input_rows_count == 0) {
return col_res; return col_res;
} }
Int64 machine_id = getMachineID(); const Int64 machine_id = getMachineID();
Int64 current_timestamp = getTimestamp(); Int64 current_timestamp = getTimestamp();
Int64 current_machine_seq_num; Int64 current_machine_seq_num;
Int64 available_id, next_available_id; Int64 available_snowflake_id, next_available_snowflake_id;
const Int64 size64 = static_cast<Int64>(input_rows_count);
do do
{ {
available_id = lowest_available_snowflake_id.load(); available_snowflake_id = lowest_available_snowflake_id.load();
Int64 available_timestamp = (available_id & timestamp_mask) >> (machine_id_size + machine_seq_num_size); const Int64 available_timestamp = (available_snowflake_id & timestamp_mask) >> (machine_id_bits_count + machine_seq_num_bits_count);
Int64 available_machine_seq_num = available_id & machine_seq_num_mask; const Int64 available_machine_seq_num = available_snowflake_id & machine_seq_num_mask;
if (current_timestamp > available_timestamp) if (current_timestamp > available_timestamp)
{ {
/// handle overflow
current_machine_seq_num = 0; current_machine_seq_num = 0;
} }
else else
@ -128,24 +124,23 @@ public:
current_machine_seq_num = available_machine_seq_num; current_machine_seq_num = available_machine_seq_num;
} }
// calculate new `lowest_available_snowflake_id` /// calculate new lowest_available_snowflake_id
const Int64 seq_nums_in_current_timestamp_left = (max_machine_seq_num - current_machine_seq_num + 1);
Int64 new_timestamp; Int64 new_timestamp;
Int64 seq_nums_in_current_timestamp_left = (max_machine_seq_num - current_machine_seq_num + 1); if (size64 >= seq_nums_in_current_timestamp_left)
if (size64 >= seq_nums_in_current_timestamp_left) {
new_timestamp = current_timestamp + 1 + (size64 - seq_nums_in_current_timestamp_left) / max_machine_seq_num; new_timestamp = current_timestamp + 1 + (size64 - seq_nums_in_current_timestamp_left) / max_machine_seq_num;
} else { else
new_timestamp = current_timestamp; new_timestamp = current_timestamp;
} const Int64 new_machine_seq_num = (current_machine_seq_num + size64) & machine_seq_num_mask;
Int64 new_machine_seq_num = (current_machine_seq_num + size64) & machine_seq_num_mask; next_available_snowflake_id = (new_timestamp << (machine_id_bits_count + machine_seq_num_bits_count)) | machine_id | new_machine_seq_num;
next_available_id = (new_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | new_machine_seq_num;
} }
while (!lowest_available_snowflake_id.compare_exchange_strong(available_id, next_available_id)); while (!lowest_available_snowflake_id.compare_exchange_strong(available_snowflake_id, next_available_snowflake_id));
// failed CAS => another thread updated `lowest_available_snowflake_id` /// failed CAS => another thread updated `lowest_available_snowflake_id`
// successful CAS => we have our range of exclusive values /// successful CAS => we have our range of exclusive values
for (Int64 & el : vec_to) for (Int64 & to_row : vec_to)
{ {
el = (current_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | current_machine_seq_num; to_row = (current_timestamp << (machine_id_bits_count + machine_seq_num_bits_count)) | machine_id | current_machine_seq_num;
if (current_machine_seq_num++ == max_machine_seq_num) if (current_machine_seq_num++ == max_machine_seq_num)
{ {
current_machine_seq_num = 0; current_machine_seq_num = 0;
@ -163,10 +158,10 @@ REGISTER_FUNCTION(GenerateSnowflakeID)
factory.registerFunction<FunctionSnowflakeID>(FunctionDocumentation factory.registerFunction<FunctionSnowflakeID>(FunctionDocumentation
{ {
.description=R"( .description=R"(
Generates Snowflake ID -- unique identificators contains: Generates a SnowflakeID -- unique identificators contains:
- The first 41 (+ 1 top zero bit) bits is timestamp in Unix time milliseconds - The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970)
- The middle 10 bits are the machine ID. - The middle 10 bits are the machine ID
- The last 12 bits decode to number of ids processed by the machine at the given millisecond. - The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by differen processes
In case the number of ids processed overflows, the timestamp field is incremented by 1 and the counter is reset to 0. In case the number of ids processed overflows, the timestamp field is incremented by 1 and the counter is reset to 0.
This function guarantees strict monotony on 1 machine and differences in values obtained on different machines. This function guarantees strict monotony on 1 machine and differences in values obtained on different machines.