mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-29 19:12:03 +00:00
Cosmetics, pt. I
This commit is contained in:
parent
d72cd45bd8
commit
1293a0f795
@ -18,8 +18,7 @@ namespace ErrorCodes
|
|||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
|
||||||
/*
|
/* Snowflake ID
|
||||||
Snowflake ID
|
|
||||||
https://en.wikipedia.org/wiki/Snowflake_ID
|
https://en.wikipedia.org/wiki/Snowflake_ID
|
||||||
|
|
||||||
0 1 2 3
|
0 1 2 3
|
||||||
@ -30,35 +29,34 @@ namespace
|
|||||||
| | machine_id | machine_seq_num |
|
| | machine_id | machine_seq_num |
|
||||||
├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
|
├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
|
||||||
|
|
||||||
- The first 41 (+ 1 top zero bit) bits is timestamp in Unix time milliseconds
|
- The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970)
|
||||||
- The middle 10 bits are the machine ID.
|
- The middle 10 bits are the machine ID
|
||||||
- The last 12 bits decode to number of ids processed by the machine at the given millisecond.
|
- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by differen processes
|
||||||
*/
|
*/
|
||||||
|
|
||||||
constexpr auto timestamp_size = 41;
|
constexpr auto timestamp_bits_count = 41;
|
||||||
constexpr auto machine_id_size = 10;
|
constexpr auto machine_id_bits_count = 10;
|
||||||
constexpr auto machine_seq_num_size = 12;
|
constexpr auto machine_seq_num_bits_count = 12;
|
||||||
|
|
||||||
constexpr int64_t timestamp_mask = ((1LL << timestamp_size) - 1) << (machine_id_size + machine_seq_num_size);
|
constexpr int64_t timestamp_mask = ((1LL << timestamp_bits_count) - 1) << (machine_id_bits_count + machine_seq_num_bits_count);
|
||||||
constexpr int64_t machine_id_mask = ((1LL << machine_id_size) - 1) << machine_seq_num_size;
|
constexpr int64_t machine_id_mask = ((1LL << machine_id_bits_count) - 1) << machine_seq_num_bits_count;
|
||||||
constexpr int64_t machine_seq_num_mask = (1LL << machine_seq_num_size) - 1;
|
constexpr int64_t machine_seq_num_mask = (1LL << machine_seq_num_bits_count) - 1;
|
||||||
constexpr int64_t max_machine_seq_num = machine_seq_num_mask;
|
constexpr int64_t max_machine_seq_num = machine_seq_num_mask;
|
||||||
|
|
||||||
Int64 getMachineID()
|
Int64 getMachineID()
|
||||||
{
|
{
|
||||||
auto serverUUID = ServerUUID::get();
|
UUID server_uuid = ServerUUID::get();
|
||||||
|
/// hash into 64 bits
|
||||||
// hash serverUUID into 64 bits
|
UInt64 hi = UUIDHelpers::getHighBytes(server_uuid);
|
||||||
Int64 h = UUIDHelpers::getHighBytes(serverUUID);
|
UInt64 lo = UUIDHelpers::getLowBytes(server_uuid);
|
||||||
Int64 l = UUIDHelpers::getLowBytes(serverUUID);
|
return ((hi * 11) ^ (lo * 17)) & machine_id_mask;
|
||||||
return ((h * 11) ^ (l * 17)) & machine_id_mask;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Int64 getTimestamp()
|
Int64 getTimestamp()
|
||||||
{
|
{
|
||||||
const auto tm_point = std::chrono::system_clock::now();
|
auto now = std::chrono::system_clock::now();
|
||||||
return std::chrono::duration_cast<std::chrono::milliseconds>(
|
auto ticks_since_epoch = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
|
||||||
tm_point.time_since_epoch()).count() & ((1LL << timestamp_size) - 1);
|
return ticks_since_epoch & ((1LL << timestamp_bits_count) - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -66,16 +64,11 @@ Int64 getTimestamp()
|
|||||||
class FunctionSnowflakeID : public IFunction
|
class FunctionSnowflakeID : public IFunction
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
mutable std::atomic<Int64> lowest_available_snowflake_id{0};
|
mutable std::atomic<Int64> lowest_available_snowflake_id = 0; /// atomic to avoid a mutex
|
||||||
// 1 atomic value because we don't want to use mutex
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static constexpr auto name = "generateSnowflakeID";
|
static constexpr auto name = "generateSnowflakeID";
|
||||||
|
static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared<FunctionSnowflakeID>(); }
|
||||||
static FunctionPtr create(ContextPtr /*context*/)
|
|
||||||
{
|
|
||||||
return std::make_shared<FunctionSnowflakeID>();
|
|
||||||
}
|
|
||||||
|
|
||||||
String getName() const override { return name; }
|
String getName() const override { return name; }
|
||||||
size_t getNumberOfArguments() const override { return 0; }
|
size_t getNumberOfArguments() const override { return 0; }
|
||||||
@ -95,31 +88,34 @@ public:
|
|||||||
return std::make_shared<DataTypeInt64>();
|
return std::make_shared<DataTypeInt64>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override
|
||||||
{
|
{
|
||||||
auto col_res = ColumnVector<Int64>::create();
|
auto col_res = ColumnVector<Int64>::create();
|
||||||
typename ColumnVector<Int64>::Container & vec_to = col_res->getData();
|
typename ColumnVector<Int64>::Container & vec_to = col_res->getData();
|
||||||
Int64 size64 = static_cast<Int64>(input_rows_count);
|
|
||||||
vec_to.resize(input_rows_count);
|
vec_to.resize(input_rows_count);
|
||||||
|
|
||||||
if (input_rows_count == 0) {
|
if (input_rows_count == 0) {
|
||||||
return col_res;
|
return col_res;
|
||||||
}
|
}
|
||||||
|
|
||||||
Int64 machine_id = getMachineID();
|
const Int64 machine_id = getMachineID();
|
||||||
Int64 current_timestamp = getTimestamp();
|
Int64 current_timestamp = getTimestamp();
|
||||||
Int64 current_machine_seq_num;
|
Int64 current_machine_seq_num;
|
||||||
|
|
||||||
Int64 available_id, next_available_id;
|
Int64 available_snowflake_id, next_available_snowflake_id;
|
||||||
|
|
||||||
|
const Int64 size64 = static_cast<Int64>(input_rows_count);
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
available_id = lowest_available_snowflake_id.load();
|
available_snowflake_id = lowest_available_snowflake_id.load();
|
||||||
Int64 available_timestamp = (available_id & timestamp_mask) >> (machine_id_size + machine_seq_num_size);
|
const Int64 available_timestamp = (available_snowflake_id & timestamp_mask) >> (machine_id_bits_count + machine_seq_num_bits_count);
|
||||||
Int64 available_machine_seq_num = available_id & machine_seq_num_mask;
|
const Int64 available_machine_seq_num = available_snowflake_id & machine_seq_num_mask;
|
||||||
|
|
||||||
if (current_timestamp > available_timestamp)
|
if (current_timestamp > available_timestamp)
|
||||||
{
|
{
|
||||||
|
/// handle overflow
|
||||||
current_machine_seq_num = 0;
|
current_machine_seq_num = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -128,24 +124,23 @@ public:
|
|||||||
current_machine_seq_num = available_machine_seq_num;
|
current_machine_seq_num = available_machine_seq_num;
|
||||||
}
|
}
|
||||||
|
|
||||||
// calculate new `lowest_available_snowflake_id`
|
/// calculate new lowest_available_snowflake_id
|
||||||
|
const Int64 seq_nums_in_current_timestamp_left = (max_machine_seq_num - current_machine_seq_num + 1);
|
||||||
Int64 new_timestamp;
|
Int64 new_timestamp;
|
||||||
Int64 seq_nums_in_current_timestamp_left = (max_machine_seq_num - current_machine_seq_num + 1);
|
if (size64 >= seq_nums_in_current_timestamp_left)
|
||||||
if (size64 >= seq_nums_in_current_timestamp_left) {
|
|
||||||
new_timestamp = current_timestamp + 1 + (size64 - seq_nums_in_current_timestamp_left) / max_machine_seq_num;
|
new_timestamp = current_timestamp + 1 + (size64 - seq_nums_in_current_timestamp_left) / max_machine_seq_num;
|
||||||
} else {
|
else
|
||||||
new_timestamp = current_timestamp;
|
new_timestamp = current_timestamp;
|
||||||
}
|
const Int64 new_machine_seq_num = (current_machine_seq_num + size64) & machine_seq_num_mask;
|
||||||
Int64 new_machine_seq_num = (current_machine_seq_num + size64) & machine_seq_num_mask;
|
next_available_snowflake_id = (new_timestamp << (machine_id_bits_count + machine_seq_num_bits_count)) | machine_id | new_machine_seq_num;
|
||||||
next_available_id = (new_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | new_machine_seq_num;
|
|
||||||
}
|
}
|
||||||
while (!lowest_available_snowflake_id.compare_exchange_strong(available_id, next_available_id));
|
while (!lowest_available_snowflake_id.compare_exchange_strong(available_snowflake_id, next_available_snowflake_id));
|
||||||
// failed CAS => another thread updated `lowest_available_snowflake_id`
|
/// failed CAS => another thread updated `lowest_available_snowflake_id`
|
||||||
// successful CAS => we have our range of exclusive values
|
/// successful CAS => we have our range of exclusive values
|
||||||
|
|
||||||
for (Int64 & el : vec_to)
|
for (Int64 & to_row : vec_to)
|
||||||
{
|
{
|
||||||
el = (current_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | current_machine_seq_num;
|
to_row = (current_timestamp << (machine_id_bits_count + machine_seq_num_bits_count)) | machine_id | current_machine_seq_num;
|
||||||
if (current_machine_seq_num++ == max_machine_seq_num)
|
if (current_machine_seq_num++ == max_machine_seq_num)
|
||||||
{
|
{
|
||||||
current_machine_seq_num = 0;
|
current_machine_seq_num = 0;
|
||||||
@ -163,10 +158,10 @@ REGISTER_FUNCTION(GenerateSnowflakeID)
|
|||||||
factory.registerFunction<FunctionSnowflakeID>(FunctionDocumentation
|
factory.registerFunction<FunctionSnowflakeID>(FunctionDocumentation
|
||||||
{
|
{
|
||||||
.description=R"(
|
.description=R"(
|
||||||
Generates Snowflake ID -- unique identificators contains:
|
Generates a SnowflakeID -- unique identificators contains:
|
||||||
- The first 41 (+ 1 top zero bit) bits is timestamp in Unix time milliseconds
|
- The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970)
|
||||||
- The middle 10 bits are the machine ID.
|
- The middle 10 bits are the machine ID
|
||||||
- The last 12 bits decode to number of ids processed by the machine at the given millisecond.
|
- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by differen processes
|
||||||
|
|
||||||
In case the number of ids processed overflows, the timestamp field is incremented by 1 and the counter is reset to 0.
|
In case the number of ids processed overflows, the timestamp field is incremented by 1 and the counter is reset to 0.
|
||||||
This function guarantees strict monotony on 1 machine and differences in values obtained on different machines.
|
This function guarantees strict monotony on 1 machine and differences in values obtained on different machines.
|
||||||
|
Loading…
Reference in New Issue
Block a user