mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge branch 'master' into chesema-merge-wb
This commit is contained in:
commit
e9fff481fa
@ -74,6 +74,7 @@ elseif (ARCH_AARCH64)
|
||||
# introduced as optional, either in v8.2 [7] or in v8.4 [8].
|
||||
# rcpc: Load-Acquire RCpc Register. Better support of release/acquire of atomics. Good for allocators and high contention code.
|
||||
# Optional in v8.2, mandatory in v8.3 [9]. Supported in Graviton >=2, Azure and GCP instances.
|
||||
# bf16: Bfloat16, a half-precision floating point format developed by Google Brain. Optional in v8.2, mandatory in v8.6.
|
||||
#
|
||||
# [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md
|
||||
# [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10
|
||||
|
@ -4489,9 +4489,9 @@ Using replacement fields, you can define a pattern for the resulting string.
|
||||
| k | clockhour of day (1~24) | number | 24 |
|
||||
| m | minute of hour | number | 30 |
|
||||
| s | second of minute | number | 55 |
|
||||
| S | fraction of second (not supported yet) | number | 978 |
|
||||
| z | time zone (short name not supported yet) | text | Pacific Standard Time; PST |
|
||||
| Z | time zone offset/id (not supported yet) | zone | -0800; -08:00; America/Los_Angeles |
|
||||
| S | fraction of second | number | 978 |
|
||||
| z | time zone | text | Eastern Standard Time; EST |
|
||||
| Z | time zone offset | zone | -0800; -0812 |
|
||||
| ' | escape for text | delimiter | |
|
||||
| '' | single quote | literal | ' |
|
||||
|
||||
|
@ -6867,9 +6867,53 @@ Same as for [parseDateTimeInJodaSyntax](#parsedatetimeinjodasyntax) except that
|
||||
|
||||
Same as for [parseDateTimeInJodaSyntax](#parsedatetimeinjodasyntax) except that it returns `NULL` when it encounters a date format that cannot be processed.
|
||||
|
||||
## parseDateTime64
|
||||
|
||||
Converts a [String](../data-types/string.md) to [DateTime64](../data-types/datetime64.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
parseDateTime64(str[, format[, timezone]])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `str` — The String to be parsed.
|
||||
- `format` — The format string. Optional. `%Y-%m-%d %H:%i:%s.%f` if not specified.
|
||||
- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md#timezone). Optional.
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
Returns [DateTime64](../data-types/datetime64.md) type values parsed from input string according to a MySQL style format string.
|
||||
|
||||
## parseDateTime64OrZero
|
||||
|
||||
Same as for [parseDateTime64](#parsedatetime64) except that it returns zero date when it encounters a date format that cannot be processed.
|
||||
|
||||
## parseDateTime64OrNull
|
||||
|
||||
Same as for [parseDateTime64](#parsedatetime64) except that it returns `NULL` when it encounters a date format that cannot be processed.
|
||||
|
||||
## parseDateTime64InJodaSyntax
|
||||
|
||||
Similar to [parseDateTimeInJodaSyntax](#parsedatetimeinjodasyntax). Differently, it returns a value of type [DateTime64](../data-types/datetime64.md).
|
||||
Converts a [String](../data-types/string.md) to [DateTime64](../data-types/datetime64.md) according to a [Joda format string](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
parseDateTime64InJodaSyntax(str[, format[, timezone]])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `str` — The String to be parsed.
|
||||
- `format` — The format string. Optional. `yyyy-MM-dd HH:mm:ss` if not specified.
|
||||
- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md#timezone). Optional.
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
Returns [DateTime64](../data-types/datetime64.md) type values parsed from input string according to a joda style format string.
|
||||
|
||||
## parseDateTime64InJodaSyntaxOrZero
|
||||
|
||||
|
@ -136,7 +136,7 @@ ClickHouse применяет настройку в тех случаях, ко
|
||||
- 0 — выключена.
|
||||
- 1 — включена.
|
||||
|
||||
Значение по умолчанию: 0.
|
||||
Значение по умолчанию: 1.
|
||||
|
||||
## http_zlib_compression_level {#settings-http_zlib_compression_level}
|
||||
|
||||
|
@ -97,7 +97,7 @@ ClickHouse从表的过时副本中选择最相关的副本。
|
||||
- 0 — Disabled.
|
||||
- 1 — Enabled.
|
||||
|
||||
默认值:0。
|
||||
默认值:1。
|
||||
|
||||
## http_zlib_compression_level {#settings-http_zlib_compression_level}
|
||||
|
||||
|
@ -22,6 +22,13 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
/** Due to a lack of proper code review, this code was contributed with a multiplication of template instantiations
|
||||
* over all pairs of data types, and we deeply regret that.
|
||||
*
|
||||
* We cannot remove all combinations, because the binary representation of serialized data has to remain the same,
|
||||
* but we can partially heal the wound by treating unsigned and signed data types in the same way.
|
||||
*/
|
||||
|
||||
template <typename ValueType, typename TimestampType>
|
||||
struct AggregationFunctionDeltaSumTimestampData
|
||||
{
|
||||
@ -37,23 +44,22 @@ template <typename ValueType, typename TimestampType>
|
||||
class AggregationFunctionDeltaSumTimestamp final
|
||||
: public IAggregateFunctionDataHelper<
|
||||
AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
|
||||
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
|
||||
>
|
||||
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>>
|
||||
{
|
||||
public:
|
||||
AggregationFunctionDeltaSumTimestamp(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<
|
||||
AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
|
||||
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
|
||||
>{arguments, params, createResultType()}
|
||||
{}
|
||||
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>>{arguments, params, createResultType()}
|
||||
{
|
||||
}
|
||||
|
||||
AggregationFunctionDeltaSumTimestamp()
|
||||
: IAggregateFunctionDataHelper<
|
||||
AggregationFunctionDeltaSumTimestampData<ValueType, TimestampType>,
|
||||
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>
|
||||
>{}
|
||||
{}
|
||||
AggregationFunctionDeltaSumTimestamp<ValueType, TimestampType>>{}
|
||||
{
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
@ -63,8 +69,8 @@ public:
|
||||
|
||||
void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
auto value = assert_cast<const ColumnVector<ValueType> &>(*columns[0]).getData()[row_num];
|
||||
auto ts = assert_cast<const ColumnVector<TimestampType> &>(*columns[1]).getData()[row_num];
|
||||
auto value = unalignedLoad<ValueType>(columns[0]->getRawData().data() + row_num * sizeof(ValueType));
|
||||
auto ts = unalignedLoad<TimestampType>(columns[1]->getRawData().data() + row_num * sizeof(TimestampType));
|
||||
|
||||
auto & data = this->data(place);
|
||||
|
||||
@ -172,10 +178,48 @@ public:
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
assert_cast<ColumnVector<ValueType> &>(to).getData().push_back(this->data(place).sum);
|
||||
static_cast<ColumnFixedSizeHelper &>(to).template insertRawData<sizeof(ValueType)>(
|
||||
reinterpret_cast<const char *>(&this->data(place).sum));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename FirstType, template <typename, typename> class AggregateFunctionTemplate, typename... TArgs>
|
||||
IAggregateFunction * createWithTwoTypesSecond(const IDataType & second_type, TArgs && ... args)
|
||||
{
|
||||
WhichDataType which(second_type);
|
||||
|
||||
if (which.idx == TypeIndex::UInt32) return new AggregateFunctionTemplate<FirstType, UInt32>(args...);
|
||||
if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate<FirstType, UInt64>(args...);
|
||||
if (which.idx == TypeIndex::Int32) return new AggregateFunctionTemplate<FirstType, UInt32>(args...);
|
||||
if (which.idx == TypeIndex::Int64) return new AggregateFunctionTemplate<FirstType, UInt64>(args...);
|
||||
if (which.idx == TypeIndex::Float32) return new AggregateFunctionTemplate<FirstType, Float32>(args...);
|
||||
if (which.idx == TypeIndex::Float64) return new AggregateFunctionTemplate<FirstType, Float64>(args...);
|
||||
if (which.idx == TypeIndex::Date) return new AggregateFunctionTemplate<FirstType, UInt16>(args...);
|
||||
if (which.idx == TypeIndex::DateTime) return new AggregateFunctionTemplate<FirstType, UInt32>(args...);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <template <typename, typename> class AggregateFunctionTemplate, typename... TArgs>
|
||||
IAggregateFunction * createWithTwoTypes(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
|
||||
{
|
||||
WhichDataType which(first_type);
|
||||
|
||||
if (which.idx == TypeIndex::UInt8) return createWithTwoTypesSecond<UInt8, AggregateFunctionTemplate>(second_type, args...);
|
||||
if (which.idx == TypeIndex::UInt16) return createWithTwoTypesSecond<UInt16, AggregateFunctionTemplate>(second_type, args...);
|
||||
if (which.idx == TypeIndex::UInt32) return createWithTwoTypesSecond<UInt32, AggregateFunctionTemplate>(second_type, args...);
|
||||
if (which.idx == TypeIndex::UInt64) return createWithTwoTypesSecond<UInt64, AggregateFunctionTemplate>(second_type, args...);
|
||||
if (which.idx == TypeIndex::Int8) return createWithTwoTypesSecond<UInt8, AggregateFunctionTemplate>(second_type, args...);
|
||||
if (which.idx == TypeIndex::Int16) return createWithTwoTypesSecond<UInt16, AggregateFunctionTemplate>(second_type, args...);
|
||||
if (which.idx == TypeIndex::Int32) return createWithTwoTypesSecond<UInt32, AggregateFunctionTemplate>(second_type, args...);
|
||||
if (which.idx == TypeIndex::Int64) return createWithTwoTypesSecond<UInt64, AggregateFunctionTemplate>(second_type, args...);
|
||||
if (which.idx == TypeIndex::Float32) return createWithTwoTypesSecond<Float32, AggregateFunctionTemplate>(second_type, args...);
|
||||
if (which.idx == TypeIndex::Float64) return createWithTwoTypesSecond<Float64, AggregateFunctionTemplate>(second_type, args...);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionDeltaSumTimestamp(
|
||||
const String & name,
|
||||
const DataTypes & arguments,
|
||||
@ -193,8 +237,14 @@ AggregateFunctionPtr createAggregateFunctionDeltaSumTimestamp(
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}, "
|
||||
"must be Int, Float, Date, DateTime", arguments[1]->getName(), name);
|
||||
|
||||
return AggregateFunctionPtr(createWithTwoNumericOrDateTypes<AggregationFunctionDeltaSumTimestamp>(
|
||||
auto res = AggregateFunctionPtr(createWithTwoTypes<AggregationFunctionDeltaSumTimestamp>(
|
||||
*arguments[0], *arguments[1], arguments, params));
|
||||
|
||||
if (!res)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}, "
|
||||
"this type is not supported", arguments[0]->getName(), name);
|
||||
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -184,36 +184,8 @@ static IAggregateFunction * createWithDecimalType(const IDataType & argument_typ
|
||||
}
|
||||
|
||||
/** For template with two arguments.
|
||||
* This is an extremely dangerous for code bloat - do not use.
|
||||
*/
|
||||
template <typename FirstType, template <typename, typename> class AggregateFunctionTemplate, typename... TArgs>
|
||||
static IAggregateFunction * createWithTwoNumericTypesSecond(const IDataType & second_type, TArgs && ... args)
|
||||
{
|
||||
WhichDataType which(second_type);
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return new AggregateFunctionTemplate<FirstType, TYPE>(args...);
|
||||
FOR_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Enum8) return new AggregateFunctionTemplate<FirstType, Int8>(args...);
|
||||
if (which.idx == TypeIndex::Enum16) return new AggregateFunctionTemplate<FirstType, Int16>(args...);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <template <typename, typename> class AggregateFunctionTemplate, typename... TArgs>
|
||||
static IAggregateFunction * createWithTwoNumericTypes(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
|
||||
{
|
||||
WhichDataType which(first_type);
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) \
|
||||
return createWithTwoNumericTypesSecond<TYPE, AggregateFunctionTemplate>(second_type, args...);
|
||||
FOR_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Enum8)
|
||||
return createWithTwoNumericTypesSecond<Int8, AggregateFunctionTemplate>(second_type, args...);
|
||||
if (which.idx == TypeIndex::Enum16)
|
||||
return createWithTwoNumericTypesSecond<Int16, AggregateFunctionTemplate>(second_type, args...);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename FirstType, template <typename, typename> class AggregateFunctionTemplate, typename... TArgs>
|
||||
static IAggregateFunction * createWithTwoBasicNumericTypesSecond(const IDataType & second_type, TArgs && ... args)
|
||||
{
|
||||
@ -237,46 +209,6 @@ static IAggregateFunction * createWithTwoBasicNumericTypes(const IDataType & fir
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename FirstType, template <typename, typename> class AggregateFunctionTemplate, typename... TArgs>
|
||||
static IAggregateFunction * createWithTwoNumericOrDateTypesSecond(const IDataType & second_type, TArgs && ... args)
|
||||
{
|
||||
WhichDataType which(second_type);
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return new AggregateFunctionTemplate<FirstType, TYPE>(args...);
|
||||
FOR_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Enum8) return new AggregateFunctionTemplate<FirstType, Int8>(args...);
|
||||
if (which.idx == TypeIndex::Enum16) return new AggregateFunctionTemplate<FirstType, Int16>(args...);
|
||||
|
||||
/// expects that DataTypeDate based on UInt16, DataTypeDateTime based on UInt32
|
||||
if (which.idx == TypeIndex::Date) return new AggregateFunctionTemplate<FirstType, UInt16>(args...);
|
||||
if (which.idx == TypeIndex::DateTime) return new AggregateFunctionTemplate<FirstType, UInt32>(args...);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <template <typename, typename> class AggregateFunctionTemplate, typename... TArgs>
|
||||
static IAggregateFunction * createWithTwoNumericOrDateTypes(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
|
||||
{
|
||||
WhichDataType which(first_type);
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) \
|
||||
return createWithTwoNumericOrDateTypesSecond<TYPE, AggregateFunctionTemplate>(second_type, args...);
|
||||
FOR_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Enum8)
|
||||
return createWithTwoNumericOrDateTypesSecond<Int8, AggregateFunctionTemplate>(second_type, args...);
|
||||
if (which.idx == TypeIndex::Enum16)
|
||||
return createWithTwoNumericOrDateTypesSecond<Int16, AggregateFunctionTemplate>(second_type, args...);
|
||||
|
||||
/// expects that DataTypeDate based on UInt16, DataTypeDateTime based on UInt32
|
||||
if (which.idx == TypeIndex::Date)
|
||||
return createWithTwoNumericOrDateTypesSecond<UInt16, AggregateFunctionTemplate>(second_type, args...);
|
||||
if (which.idx == TypeIndex::DateTime)
|
||||
return createWithTwoNumericOrDateTypesSecond<UInt32, AggregateFunctionTemplate>(second_type, args...);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <template <typename> class AggregateFunctionTemplate, typename... TArgs>
|
||||
static IAggregateFunction * createWithStringType(const IDataType & argument_type, TArgs && ... args)
|
||||
{
|
||||
|
@ -1794,7 +1794,7 @@ Possible values:
|
||||
|
||||
- 0 — Disabled.
|
||||
- 1 — Enabled.
|
||||
)", 0) \
|
||||
)", 1) \
|
||||
DECLARE(Int64, http_zlib_compression_level, 3, R"(
|
||||
Sets the level of data compression in the response to an HTTP request if [enable_http_compression = 1](#enable_http_compression).
|
||||
|
||||
@ -4565,7 +4565,7 @@ Possible values:
|
||||
- 0 - Disable
|
||||
- 1 - Enable
|
||||
)", 0) \
|
||||
DECLARE(Bool, query_plan_merge_filters, false, R"(
|
||||
DECLARE(Bool, query_plan_merge_filters, true, R"(
|
||||
Allow to merge filters in the query plan
|
||||
)", 0) \
|
||||
DECLARE(Bool, query_plan_filter_push_down, true, R"(
|
||||
|
@ -64,6 +64,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
},
|
||||
{"24.11",
|
||||
{
|
||||
{"enable_http_compression", false, true, "Improvement for read-only clients since they can't change settings"},
|
||||
{"validate_mutation_query", false, true, "New setting to validate mutation queries by default."},
|
||||
{"enable_job_stack_trace", false, true, "Enable by default collecting stack traces from job's scheduling."},
|
||||
{"allow_suspicious_types_in_group_by", true, false, "Don't allow Variant/Dynamic types in GROUP BY by default"},
|
||||
@ -77,6 +78,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
{"backup_restore_keeper_max_retries_while_initializing", 0, 20, "New setting."},
|
||||
{"backup_restore_keeper_max_retries_while_handling_error", 0, 20, "New setting."},
|
||||
{"backup_restore_finish_timeout_after_error_sec", 0, 180, "New setting."},
|
||||
{"query_plan_merge_filters", false, true, "Allow to merge filters in the query plan. This is required to properly support filter-push-down with a new analyzer."},
|
||||
{"parallel_replicas_local_plan", false, true, "Use local plan for local replica in a query with parallel replicas"},
|
||||
{"allow_experimental_bfloat16_type", false, false, "Add new experimental BFloat16 type"},
|
||||
{"filesystem_cache_skip_download_if_exceeds_per_query_cache_write_limit", 1, 1, "Rename of setting skip_download_if_exceeds_query_cache_limit"},
|
||||
|
@ -64,6 +64,7 @@ constexpr time_t MAX_DATETIME_DAY_NUM = 49710; // 2106-02-07
|
||||
/// This factor transformation will say that the function is monotone everywhere.
|
||||
struct ZeroTransform
|
||||
{
|
||||
static constexpr auto name = "Zero";
|
||||
static UInt16 execute(Int64, const DateLUTImpl &) { return 0; }
|
||||
static UInt16 execute(UInt32, const DateLUTImpl &) { return 0; }
|
||||
static UInt16 execute(Int32, const DateLUTImpl &) { return 0; }
|
||||
|
@ -56,6 +56,21 @@ public:
|
||||
: is_not_monotonic;
|
||||
}
|
||||
|
||||
if (checkAndGetDataType<DataTypeDateTime64>(&type))
|
||||
{
|
||||
|
||||
const auto & left_date_time = left.safeGet<DateTime64>();
|
||||
TransformDateTime64<typename Transform::FactorTransform> transformer_left(left_date_time.getScale());
|
||||
|
||||
const auto & right_date_time = right.safeGet<DateTime64>();
|
||||
TransformDateTime64<typename Transform::FactorTransform> transformer_right(right_date_time.getScale());
|
||||
|
||||
return transformer_left.execute(left_date_time.getValue(), date_lut)
|
||||
== transformer_right.execute(right_date_time.getValue(), date_lut)
|
||||
? is_monotonic
|
||||
: is_not_monotonic;
|
||||
}
|
||||
|
||||
return Transform::FactorTransform::execute(UInt32(left.safeGet<UInt64>()), date_lut)
|
||||
== Transform::FactorTransform::execute(UInt32(right.safeGet<UInt64>()), date_lut)
|
||||
? is_monotonic
|
||||
|
@ -32,12 +32,12 @@ namespace Setting
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE;
|
||||
extern const int CANNOT_PARSE_DATETIME;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NOT_ENOUGH_SPACE;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -57,6 +57,12 @@ namespace
|
||||
Null
|
||||
};
|
||||
|
||||
enum class ReturnType: uint8_t
|
||||
{
|
||||
DateTime,
|
||||
DateTime64
|
||||
};
|
||||
|
||||
constexpr Int32 minYear = 1970;
|
||||
constexpr Int32 maxYear = 2106;
|
||||
|
||||
@ -186,12 +192,13 @@ namespace
|
||||
Int32 minute = 0; /// range [0, 59]
|
||||
Int32 second = 0; /// range [0, 59]
|
||||
Int32 microsecond = 0; /// range [0, 999999]
|
||||
UInt32 scale = 0; /// scale of the result DateTime64. Always 6 for ParseSytax == MySQL, [0, 6] for ParseSyntax == Joda.
|
||||
|
||||
bool is_am = true; /// If is_hour_of_half_day = true and is_am = false (i.e. pm) then add 12 hours to the result DateTime
|
||||
bool hour_starts_at_1 = false; /// Whether the hour is clockhour
|
||||
bool is_hour_of_half_day = false; /// Whether the hour is of half day
|
||||
|
||||
bool has_time_zone_offset = false; /// If true, time zone offset is explicitly specified.
|
||||
bool has_time_zone_offset = false; /// If true, timezone offset is explicitly specified.
|
||||
Int64 time_zone_offset = 0; /// Offset in seconds between current timezone to UTC.
|
||||
|
||||
void reset()
|
||||
@ -214,6 +221,7 @@ namespace
|
||||
minute = 0;
|
||||
second = 0;
|
||||
microsecond = 0;
|
||||
scale = 0;
|
||||
|
||||
is_am = true;
|
||||
hour_starts_at_1 = false;
|
||||
@ -449,6 +457,18 @@ namespace
|
||||
return {};
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
VoidOrError setScale(UInt8 scale_, ParseSyntax parse_syntax_)
|
||||
{
|
||||
if (parse_syntax_ == ParseSyntax::MySQL && scale_ != 6)
|
||||
RETURN_ERROR(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for scale must be 6 for MySQL parse syntax", std::to_string(scale_))
|
||||
else if (parse_syntax_ == ParseSyntax::Joda && scale_ > 6)
|
||||
RETURN_ERROR(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for scale must be in the range [0, 6] for Joda syntax", std::to_string(scale_))
|
||||
|
||||
scale = scale_;
|
||||
return {};
|
||||
}
|
||||
|
||||
/// For debug
|
||||
[[maybe_unused]] String toString() const
|
||||
{
|
||||
@ -571,7 +591,7 @@ namespace
|
||||
};
|
||||
|
||||
/// _FUNC_(str[, format, timezone])
|
||||
template <typename Name, ParseSyntax parse_syntax, ErrorHandling error_handling, bool parseDateTime64 = false>
|
||||
template <typename Name, ParseSyntax parse_syntax, ReturnType return_type, ErrorHandling error_handling>
|
||||
class FunctionParseDateTimeImpl : public IFunction
|
||||
{
|
||||
public:
|
||||
@ -601,93 +621,100 @@ namespace
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"time", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}
|
||||
};
|
||||
|
||||
FunctionArgumentDescriptors optional_args{
|
||||
{"format", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
|
||||
{"format", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), &isColumnConst, "const String"},
|
||||
{"timezone", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), &isColumnConst, "const String"}
|
||||
};
|
||||
|
||||
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
|
||||
|
||||
String time_zone_name = getTimeZone(arguments).getTimeZone();
|
||||
DataTypePtr date_type = nullptr;
|
||||
if constexpr (parseDateTime64)
|
||||
{
|
||||
String format = getFormat(arguments);
|
||||
std::vector<Instruction> instructions = parseFormat(format);
|
||||
UInt32 scale = 0;
|
||||
if (!instructions.empty())
|
||||
{
|
||||
for (const auto & ins : instructions)
|
||||
{
|
||||
if (scale > 0)
|
||||
break;
|
||||
const String fragment = ins.getFragment();
|
||||
for (char ch : fragment)
|
||||
{
|
||||
if (ch != 'S')
|
||||
{
|
||||
scale = 0;
|
||||
break;
|
||||
}
|
||||
else
|
||||
scale++;
|
||||
}
|
||||
}
|
||||
}
|
||||
date_type = std::make_shared<DataTypeDateTime64>(scale, time_zone_name);
|
||||
}
|
||||
DataTypePtr data_type;
|
||||
if constexpr (return_type == ReturnType::DateTime)
|
||||
data_type = std::make_shared<DataTypeDateTime>(time_zone_name);
|
||||
else
|
||||
date_type = std::make_shared<DataTypeDateTime>(time_zone_name);
|
||||
{
|
||||
if constexpr (parse_syntax == ParseSyntax::MySQL)
|
||||
data_type = std::make_shared<DataTypeDateTime64>(6, time_zone_name);
|
||||
else
|
||||
{
|
||||
String format = getFormat(arguments);
|
||||
std::vector<Instruction> instructions = parseFormat(format);
|
||||
/// How many 'S' characters does the format string contain?
|
||||
UInt32 s_count = 0;
|
||||
for (const auto & instruction : instructions)
|
||||
{
|
||||
const String fragment = instruction.getFragment();
|
||||
for (char c : fragment)
|
||||
{
|
||||
if (c == 'S')
|
||||
++s_count;
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (s_count > 0)
|
||||
break;
|
||||
}
|
||||
/// Use s_count as DateTime64's scale.
|
||||
data_type = std::make_shared<DataTypeDateTime64>(s_count, time_zone_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (error_handling == ErrorHandling::Null)
|
||||
return std::make_shared<DataTypeNullable>(date_type);
|
||||
return date_type;
|
||||
return std::make_shared<DataTypeNullable>(data_type);
|
||||
return data_type;
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
ColumnUInt8::MutablePtr col_null_map;
|
||||
DataTypePtr result_type_without_nullable;
|
||||
if constexpr (error_handling == ErrorHandling::Null)
|
||||
col_null_map = ColumnUInt8::create(input_rows_count, 0);
|
||||
if constexpr (parseDateTime64)
|
||||
result_type_without_nullable = removeNullable(result_type); /// Remove Nullable wrapper. It will be added back later.
|
||||
else
|
||||
result_type_without_nullable = result_type;
|
||||
|
||||
if constexpr (return_type == ReturnType::DateTime)
|
||||
{
|
||||
const DataTypeDateTime64 * datatime64_type = checkAndGetDataType<DataTypeDateTime64>(removeNullable(result_type).get());
|
||||
auto col_res = ColumnDateTime64::create(input_rows_count, datatime64_type->getScale());
|
||||
PaddedPODArray<DataTypeDateTime64::FieldType> & res_data = col_res->getData();
|
||||
executeImpl2<DataTypeDateTime64::FieldType>(arguments, result_type, input_rows_count, res_data, col_null_map);
|
||||
if constexpr (error_handling == ErrorHandling::Null)
|
||||
return ColumnNullable::create(std::move(col_res), std::move(col_null_map));
|
||||
else
|
||||
return col_res;
|
||||
MutableColumnPtr col_res = ColumnDateTime::create(input_rows_count);
|
||||
ColumnDateTime * col_datetime = assert_cast<ColumnDateTime *>(col_res.get());
|
||||
return executeImpl2<DataTypeDateTime::FieldType>(arguments, result_type, input_rows_count, col_res, col_datetime->getData());
|
||||
}
|
||||
else
|
||||
{
|
||||
auto col_res = ColumnDateTime::create(input_rows_count);
|
||||
PaddedPODArray<DataTypeDateTime::FieldType> & res_data = col_res->getData();
|
||||
executeImpl2<DataTypeDateTime::FieldType>(arguments, result_type, input_rows_count, res_data, col_null_map);
|
||||
if constexpr (error_handling == ErrorHandling::Null)
|
||||
return ColumnNullable::create(std::move(col_res), std::move(col_null_map));
|
||||
else
|
||||
return col_res;
|
||||
const auto * result_type_without_nullable_casted = checkAndGetDataType<DataTypeDateTime64>(result_type_without_nullable.get());
|
||||
MutableColumnPtr col_res = ColumnDateTime64::create(input_rows_count, result_type_without_nullable_casted->getScale());
|
||||
ColumnDateTime64 * col_datetime64 = assert_cast<ColumnDateTime64 *>(col_res.get());
|
||||
return executeImpl2<DataTypeDateTime64::FieldType>(arguments, result_type, input_rows_count, col_res, col_datetime64->getData());
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void executeImpl2(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count,
|
||||
PaddedPODArray<T> & res_data, ColumnUInt8::MutablePtr & col_null_map) const
|
||||
ColumnPtr executeImpl2(
|
||||
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count,
|
||||
MutableColumnPtr & col_res, PaddedPODArray<T> & res_data) const
|
||||
{
|
||||
const auto * col_str = checkAndGetColumn<ColumnString>(arguments[0].column.get());
|
||||
if (!col_str)
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of first ('str') argument of function {}. Must be string.",
|
||||
arguments[0].column->getName(),
|
||||
"Illegal type in 1st ('time') argument of function {}. Must be String.",
|
||||
getName());
|
||||
|
||||
String format = getFormat(arguments);
|
||||
const auto & time_zone = getTimeZone(arguments);
|
||||
std::vector<Instruction> instructions = parseFormat(format);
|
||||
Int64 multiplier = 0;
|
||||
UInt32 scale = 0;
|
||||
if constexpr (return_type == ReturnType::DateTime64)
|
||||
{
|
||||
const DataTypeDateTime64 * result_type_without_nullable_casted = checkAndGetDataType<DataTypeDateTime64>(removeNullable(result_type).get());
|
||||
scale = result_type_without_nullable_casted->getScale();
|
||||
multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
|
||||
}
|
||||
|
||||
ColumnUInt8::MutablePtr col_null_map;
|
||||
if constexpr (error_handling == ErrorHandling::Null)
|
||||
col_null_map = ColumnUInt8::create(input_rows_count, 0);
|
||||
|
||||
const String format = getFormat(arguments);
|
||||
const std::vector<Instruction> instructions = parseFormat(format);
|
||||
const auto & time_zone = getTimeZone(arguments);
|
||||
/// Make datetime fit in a cache line.
|
||||
alignas(64) DateTime<error_handling> datetime;
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
@ -698,6 +725,15 @@ namespace
|
||||
Pos end = str_ref.data + str_ref.size;
|
||||
bool error = false;
|
||||
|
||||
if constexpr (return_type == ReturnType::DateTime64)
|
||||
{
|
||||
if (auto result = datetime.setScale(static_cast<UInt8>(scale), parse_syntax); !result.has_value())
|
||||
{
|
||||
const ErrorCodeAndMessage & err = result.error();
|
||||
throw Exception(err.error_code, "Invalid scale value: {}, {}", std::to_string(scale), err.error_message);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto & instruction : instructions)
|
||||
{
|
||||
if (auto result = instruction.perform(cur, end, datetime); result.has_value())
|
||||
@ -732,9 +768,8 @@ namespace
|
||||
continue;
|
||||
|
||||
Int64OrError result = 0;
|
||||
|
||||
/// Ensure all input was consumed
|
||||
if (!parseDateTime64 && cur < end)
|
||||
if (cur < end)
|
||||
{
|
||||
result = tl::unexpected(ErrorCodeAndMessage(
|
||||
ErrorCodes::CANNOT_PARSE_DATETIME,
|
||||
@ -747,14 +782,10 @@ namespace
|
||||
{
|
||||
if (result = datetime.buildDateTime(time_zone); result.has_value())
|
||||
{
|
||||
if constexpr (parseDateTime64)
|
||||
{
|
||||
const DataTypeDateTime64 * datatime64_type = checkAndGetDataType<DataTypeDateTime64>(removeNullable(result_type).get());
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(datatime64_type->getScale());
|
||||
res_data[i] = static_cast<Int64>(*result) * multiplier + datetime.microsecond;
|
||||
}
|
||||
else
|
||||
if constexpr (return_type == ReturnType::DateTime)
|
||||
res_data[i] = static_cast<UInt32>(*result);
|
||||
else
|
||||
res_data[i] = static_cast<Int64>(*result) * multiplier + datetime.microsecond;
|
||||
}
|
||||
}
|
||||
|
||||
@ -777,6 +808,10 @@ namespace
|
||||
}
|
||||
}
|
||||
}
|
||||
if constexpr (error_handling == ErrorHandling::Null)
|
||||
return ColumnNullable::create(std::move(col_res), std::move(col_null_map));
|
||||
else
|
||||
return std::move(col_res);
|
||||
}
|
||||
|
||||
|
||||
@ -808,7 +843,7 @@ namespace
|
||||
explicit Instruction(const String & literal_) : literal(literal_), fragment("LITERAL") { }
|
||||
explicit Instruction(String && literal_) : literal(std::move(literal_)), fragment("LITERAL") { }
|
||||
|
||||
String getFragment() const { return fragment; }
|
||||
const String & getFragment() const { return fragment; }
|
||||
|
||||
/// For debug
|
||||
[[maybe_unused]] String toString() const
|
||||
@ -885,6 +920,28 @@ namespace
|
||||
return cur;
|
||||
}
|
||||
|
||||
template<typename T, NeedCheckSpace need_check_space>
|
||||
[[nodiscard]]
|
||||
static PosOrError readNumber6(Pos cur, Pos end, [[maybe_unused]] const String & fragment, T & res)
|
||||
{
|
||||
if constexpr (need_check_space == NeedCheckSpace::Yes)
|
||||
RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 6, "readNumber6 requires size >= 6", fragment))
|
||||
|
||||
res = (*cur - '0');
|
||||
++cur;
|
||||
res = res * 10 + (*cur - '0');
|
||||
++cur;
|
||||
res = res * 10 + (*cur - '0');
|
||||
++cur;
|
||||
res = res * 10 + (*cur - '0');
|
||||
++cur;
|
||||
res = res * 10 + (*cur - '0');
|
||||
++cur;
|
||||
res = res * 10 + (*cur - '0');
|
||||
++cur;
|
||||
return cur;
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
static VoidOrError checkSpace(Pos cur, Pos end, size_t len, const String & msg, const String & fragment)
|
||||
{
|
||||
@ -1305,13 +1362,28 @@ namespace
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
static PosOrError mysqlMicrosecond(Pos cur, Pos end, const String & fragment, DateTime<error_handling> & /*date*/)
|
||||
static PosOrError mysqlMicrosecond(Pos cur, Pos end, const String & fragment, DateTime<error_handling> & date)
|
||||
{
|
||||
RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 6, "mysqlMicrosecond requires size >= 6", fragment))
|
||||
|
||||
for (size_t i = 0; i < 6; ++i)
|
||||
ASSIGN_RESULT_OR_RETURN_ERROR(cur, (assertNumber<NeedCheckSpace::No>(cur, end, fragment)))
|
||||
if constexpr (return_type == ReturnType::DateTime)
|
||||
{
|
||||
RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 6, "mysqlMicrosecond requires size >= 6", fragment))
|
||||
|
||||
for (size_t i = 0; i < 6; ++i)
|
||||
ASSIGN_RESULT_OR_RETURN_ERROR(cur, (assertNumber<NeedCheckSpace::No>(cur, end, fragment)))
|
||||
}
|
||||
else
|
||||
{
|
||||
if (date.scale != 6)
|
||||
RETURN_ERROR(
|
||||
ErrorCodes::CANNOT_PARSE_DATETIME,
|
||||
"Unable to parse fragment {} from {} because the datetime scale {} is not 6",
|
||||
fragment,
|
||||
std::string_view(cur, end - cur),
|
||||
std::to_string(date.scale))
|
||||
Int32 microsecond = 0;
|
||||
ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumber6<Int32, NeedCheckSpace::Yes>(cur, end, fragment, microsecond)))
|
||||
RETURN_ERROR_IF_FAILED(date.setMicrosecond(microsecond))
|
||||
}
|
||||
return cur;
|
||||
}
|
||||
|
||||
@ -1695,7 +1767,7 @@ namespace
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
static PosOrError jodaMicroSecondOfSecond(size_t repetitions, Pos cur, Pos end, const String & fragment, DateTime<error_handling> & date)
|
||||
static PosOrError jodaMicrosecondOfSecond(size_t repetitions, Pos cur, Pos end, const String & fragment, DateTime<error_handling> & date)
|
||||
{
|
||||
Int32 microsecond;
|
||||
ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2uz), fragment, microsecond)))
|
||||
@ -1704,31 +1776,32 @@ namespace
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
static PosOrError jodaTimezoneId(size_t, Pos cur, Pos end, const String &, DateTime<error_handling> & date)
|
||||
static PosOrError jodaTimezone(size_t, Pos cur, Pos end, const String &, DateTime<error_handling> & date)
|
||||
{
|
||||
String dateTimeZone;
|
||||
String read_time_zone;
|
||||
while (cur <= end)
|
||||
{
|
||||
dateTimeZone += *cur;
|
||||
read_time_zone += *cur;
|
||||
++cur;
|
||||
}
|
||||
const DateLUTImpl & date_time_zone = DateLUT::instance(dateTimeZone);
|
||||
const DateLUTImpl & date_time_zone = DateLUT::instance(read_time_zone);
|
||||
const auto result = date.buildDateTime(date_time_zone);
|
||||
if (result.has_value())
|
||||
{
|
||||
const auto timezoneOffset = date_time_zone.timezoneOffset(*result);
|
||||
const DateLUTImpl::Time timezone_offset = date_time_zone.timezoneOffset(*result);
|
||||
date.has_time_zone_offset = true;
|
||||
date.time_zone_offset = timezoneOffset;
|
||||
date.time_zone_offset = timezone_offset;
|
||||
return cur;
|
||||
}
|
||||
else
|
||||
RETURN_ERROR(ErrorCodes::CANNOT_PARSE_DATETIME, "Unable to build date time from timezone {}", dateTimeZone)
|
||||
RETURN_ERROR(ErrorCodes::CANNOT_PARSE_DATETIME, "Unable to parse date time from timezone {}", read_time_zone)
|
||||
}
|
||||
|
||||
[[nodiscard]]
|
||||
static PosOrError jodaTimezoneOffset(size_t repetitions, Pos cur, Pos end, const String & fragment, DateTime<error_handling> & date)
|
||||
{
|
||||
RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 5, "jodaTimezoneOffset requires size >= 5", fragment))
|
||||
|
||||
Int32 sign;
|
||||
if (*cur == '-')
|
||||
sign = -1;
|
||||
@ -1737,7 +1810,7 @@ namespace
|
||||
else
|
||||
RETURN_ERROR(
|
||||
ErrorCodes::CANNOT_PARSE_DATETIME,
|
||||
"Unable to parse fragment {} from {} because of unknown sign time zone offset: {}",
|
||||
"Unable to parse fragment {} from {} because of unknown sign in time zone offset: {}",
|
||||
fragment,
|
||||
std::string_view(cur, end - cur),
|
||||
std::string_view(cur, 1))
|
||||
@ -1745,8 +1818,22 @@ namespace
|
||||
|
||||
Int32 hour;
|
||||
ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2uz), fragment, hour)))
|
||||
if (hour < 0 || hour > 23)
|
||||
RETURN_ERROR(
|
||||
ErrorCodes::CANNOT_PARSE_DATETIME,
|
||||
"Unable to parse fragment {} from {} because the hour of datetime not in range [0, 23]: {}",
|
||||
fragment,
|
||||
std::string_view(cur, end - cur),
|
||||
std::string_view(cur, 1))
|
||||
Int32 minute;
|
||||
ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2uz), fragment, minute)))
|
||||
if (minute < 0 || minute > 59)
|
||||
RETURN_ERROR(
|
||||
ErrorCodes::CANNOT_PARSE_DATETIME,
|
||||
"Unable to parse fragment {} from {} because the minute of datetime not in range [0, 59]: {}",
|
||||
fragment,
|
||||
std::string_view(cur, end - cur),
|
||||
std::string_view(cur, 1))
|
||||
date.has_time_zone_offset = true;
|
||||
date.time_zone_offset = sign * (hour * 3600 + minute * 60);
|
||||
return cur;
|
||||
@ -2133,10 +2220,10 @@ namespace
|
||||
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaSecondOfMinute, repetitions));
|
||||
break;
|
||||
case 'S':
|
||||
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaMicroSecondOfSecond, repetitions));
|
||||
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaMicrosecondOfSecond, repetitions));
|
||||
break;
|
||||
case 'z':
|
||||
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaTimezoneId, repetitions));
|
||||
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaTimezone, repetitions));
|
||||
break;
|
||||
case 'Z':
|
||||
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaTimezoneOffset, repetitions));
|
||||
@ -2161,21 +2248,22 @@ namespace
|
||||
if (arguments.size() == 1)
|
||||
{
|
||||
if constexpr (parse_syntax == ParseSyntax::MySQL)
|
||||
return "%Y-%m-%d %H:%i:%s";
|
||||
{
|
||||
if constexpr (return_type == ReturnType::DateTime)
|
||||
return "%Y-%m-%d %H:%i:%s";
|
||||
else
|
||||
return "%Y-%m-%d %H:%i:%s.%f";
|
||||
}
|
||||
else
|
||||
return "yyyy-MM-dd HH:mm:ss";
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!arguments[1].column || !isColumnConst(*arguments[1].column))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument at index {} for function {} must be constant", 1, getName());
|
||||
|
||||
const auto * col_format = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
|
||||
if (!col_format)
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of second ('format') argument of function {}. Must be constant string.",
|
||||
arguments[1].column->getName(),
|
||||
"Illegal type in 'format' argument of function {}. Must be constant String.",
|
||||
getName());
|
||||
return col_format->getValue<String>();
|
||||
}
|
||||
@ -2190,8 +2278,7 @@ namespace
|
||||
if (!col)
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of third ('timezone') argument of function {}. Must be constant String.",
|
||||
arguments[2].column->getName(),
|
||||
"Illegal type in 'timezone' argument of function {}. Must be constant String.",
|
||||
getName());
|
||||
|
||||
String time_zone = col->getValue<String>();
|
||||
@ -2229,6 +2316,21 @@ namespace
|
||||
static constexpr auto name = "parseDateTimeInJodaSyntaxOrNull";
|
||||
};
|
||||
|
||||
struct NameParseDateTime64
|
||||
{
|
||||
static constexpr auto name = "parseDateTime64";
|
||||
};
|
||||
|
||||
struct NameParseDateTime64OrZero
|
||||
{
|
||||
static constexpr auto name = "parseDateTime64OrZero";
|
||||
};
|
||||
|
||||
struct NameParseDateTime64OrNull
|
||||
{
|
||||
static constexpr auto name = "parseDateTime64OrNull";
|
||||
};
|
||||
|
||||
struct NameParseDateTime64InJodaSyntax
|
||||
{
|
||||
static constexpr auto name = "parseDateTime64InJodaSyntax";
|
||||
@ -2244,15 +2346,18 @@ namespace
|
||||
static constexpr auto name = "parseDateTime64InJodaSyntaxOrNull";
|
||||
};
|
||||
|
||||
using FunctionParseDateTime = FunctionParseDateTimeImpl<NameParseDateTime, ParseSyntax::MySQL, ErrorHandling::Exception>;
|
||||
using FunctionParseDateTimeOrZero = FunctionParseDateTimeImpl<NameParseDateTimeOrZero, ParseSyntax::MySQL, ErrorHandling::Zero>;
|
||||
using FunctionParseDateTimeOrNull = FunctionParseDateTimeImpl<NameParseDateTimeOrNull, ParseSyntax::MySQL, ErrorHandling::Null>;
|
||||
using FunctionParseDateTimeInJodaSyntax = FunctionParseDateTimeImpl<NameParseDateTimeInJodaSyntax, ParseSyntax::Joda, ErrorHandling::Exception>;
|
||||
using FunctionParseDateTimeInJodaSyntaxOrZero = FunctionParseDateTimeImpl<NameParseDateTimeInJodaSyntaxOrZero, ParseSyntax::Joda, ErrorHandling::Zero>;
|
||||
using FunctionParseDateTimeInJodaSyntaxOrNull = FunctionParseDateTimeImpl<NameParseDateTimeInJodaSyntaxOrNull, ParseSyntax::Joda, ErrorHandling::Null>;
|
||||
using FunctionParseDateTime64InJodaSyntax = FunctionParseDateTimeImpl<NameParseDateTime64InJodaSyntax, ParseSyntax::Joda, ErrorHandling::Exception, true>;
|
||||
using FunctionParseDateTime64InJodaSyntaxOrZero = FunctionParseDateTimeImpl<NameParseDateTime64InJodaSyntaxOrZero, ParseSyntax::Joda, ErrorHandling::Zero, true>;
|
||||
using FunctionParseDateTime64InJodaSyntaxOrNull = FunctionParseDateTimeImpl<NameParseDateTime64InJodaSyntaxOrNull, ParseSyntax::Joda, ErrorHandling::Null, true>;
|
||||
using FunctionParseDateTime = FunctionParseDateTimeImpl<NameParseDateTime, ParseSyntax::MySQL, ReturnType::DateTime, ErrorHandling::Exception>;
|
||||
using FunctionParseDateTimeOrZero = FunctionParseDateTimeImpl<NameParseDateTimeOrZero, ParseSyntax::MySQL, ReturnType::DateTime, ErrorHandling::Zero>;
|
||||
using FunctionParseDateTimeOrNull = FunctionParseDateTimeImpl<NameParseDateTimeOrNull, ParseSyntax::MySQL, ReturnType::DateTime, ErrorHandling::Null>;
|
||||
using FunctionParseDateTime64 = FunctionParseDateTimeImpl<NameParseDateTime64, ParseSyntax::MySQL, ReturnType::DateTime64, ErrorHandling::Exception>;
|
||||
using FunctionParseDateTime64OrZero = FunctionParseDateTimeImpl<NameParseDateTime64OrZero, ParseSyntax::MySQL, ReturnType::DateTime64, ErrorHandling::Zero>;
|
||||
using FunctionParseDateTime64OrNull = FunctionParseDateTimeImpl<NameParseDateTime64OrNull, ParseSyntax::MySQL, ReturnType::DateTime64, ErrorHandling::Null>;
|
||||
using FunctionParseDateTimeInJodaSyntax = FunctionParseDateTimeImpl<NameParseDateTimeInJodaSyntax, ParseSyntax::Joda, ReturnType::DateTime, ErrorHandling::Exception>;
|
||||
using FunctionParseDateTimeInJodaSyntaxOrZero = FunctionParseDateTimeImpl<NameParseDateTimeInJodaSyntaxOrZero, ParseSyntax::Joda, ReturnType::DateTime, ErrorHandling::Zero>;
|
||||
using FunctionParseDateTimeInJodaSyntaxOrNull = FunctionParseDateTimeImpl<NameParseDateTimeInJodaSyntaxOrNull, ParseSyntax::Joda, ReturnType::DateTime, ErrorHandling::Null>;
|
||||
using FunctionParseDateTime64InJodaSyntax = FunctionParseDateTimeImpl<NameParseDateTime64InJodaSyntax, ParseSyntax::Joda, ReturnType::DateTime64, ErrorHandling::Exception>;
|
||||
using FunctionParseDateTime64InJodaSyntaxOrZero = FunctionParseDateTimeImpl<NameParseDateTime64InJodaSyntaxOrZero, ParseSyntax::Joda, ReturnType::DateTime64, ErrorHandling::Zero>;
|
||||
using FunctionParseDateTime64InJodaSyntaxOrNull = FunctionParseDateTimeImpl<NameParseDateTime64InJodaSyntaxOrNull, ParseSyntax::Joda, ReturnType::DateTime64, ErrorHandling::Null>;
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(ParseDateTime)
|
||||
@ -2262,13 +2367,16 @@ REGISTER_FUNCTION(ParseDateTime)
|
||||
factory.registerFunction<FunctionParseDateTimeOrZero>();
|
||||
factory.registerFunction<FunctionParseDateTimeOrNull>();
|
||||
factory.registerAlias("str_to_date", FunctionParseDateTimeOrNull::name, FunctionFactory::Case::Insensitive);
|
||||
|
||||
factory.registerFunction<FunctionParseDateTimeInJodaSyntax>();
|
||||
factory.registerFunction<FunctionParseDateTimeInJodaSyntaxOrZero>();
|
||||
factory.registerFunction<FunctionParseDateTimeInJodaSyntaxOrNull>();
|
||||
|
||||
factory.registerFunction<FunctionParseDateTime64InJodaSyntax>();
|
||||
factory.registerFunction<FunctionParseDateTime64InJodaSyntaxOrZero>();
|
||||
factory.registerFunction<FunctionParseDateTime64InJodaSyntaxOrNull>();
|
||||
factory.registerFunction<FunctionParseDateTime64>();
|
||||
factory.registerFunction<FunctionParseDateTime64OrZero>();
|
||||
factory.registerFunction<FunctionParseDateTime64OrNull>();
|
||||
}
|
||||
|
||||
|
||||
|
@ -3387,6 +3387,8 @@ UInt64 calculateCacheKey(const DB::ASTPtr & select_query)
|
||||
|
||||
SipHash hash;
|
||||
hash.update(select.tables()->getTreeHash(/*ignore_aliases=*/true));
|
||||
if (const auto prewhere = select.prewhere())
|
||||
hash.update(prewhere->getTreeHash(/*ignore_aliases=*/true));
|
||||
if (const auto where = select.where())
|
||||
hash.update(where->getTreeHash(/*ignore_aliases=*/true));
|
||||
if (const auto group_by = select.groupBy())
|
||||
|
@ -11,10 +11,12 @@
|
||||
#include <Interpreters/PreparedSets.h>
|
||||
#include <Interpreters/TableJoin.h>
|
||||
#include <Interpreters/createBlockSelector.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/DumpASTNode.h>
|
||||
#include <Parsers/ExpressionListParsers.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Storages/SelectQueryInfo.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
@ -341,16 +343,29 @@ Blocks ConcurrentHashJoin::dispatchBlock(const Strings & key_columns_names, cons
|
||||
return result;
|
||||
}
|
||||
|
||||
UInt64 calculateCacheKey(std::shared_ptr<TableJoin> & table_join, const QueryTreeNodePtr & right_table_expression)
|
||||
UInt64 calculateCacheKey(
|
||||
std::shared_ptr<TableJoin> & table_join, const QueryTreeNodePtr & right_table_expression, const SelectQueryInfo & select_query_info)
|
||||
{
|
||||
const auto * select = select_query_info.query->as<DB::ASTSelectQuery>();
|
||||
if (!select)
|
||||
return 0;
|
||||
|
||||
IQueryTreeNode::HashState hash;
|
||||
|
||||
if (const auto prewhere = select->prewhere())
|
||||
hash.update(prewhere->getTreeHash(/*ignore_aliases=*/true));
|
||||
if (const auto where = select->where())
|
||||
hash.update(where->getTreeHash(/*ignore_aliases=*/true));
|
||||
|
||||
chassert(right_table_expression);
|
||||
hash.update(right_table_expression->getTreeHash());
|
||||
|
||||
chassert(table_join && table_join->oneDisjunct());
|
||||
const auto keys
|
||||
= NameOrderedSet{table_join->getClauses().at(0).key_names_right.begin(), table_join->getClauses().at(0).key_names_right.end()};
|
||||
for (const auto & name : keys)
|
||||
hash.update(name);
|
||||
|
||||
return hash.get64();
|
||||
}
|
||||
}
|
||||
|
@ -4,8 +4,8 @@
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Interpreters/HashTablesStatistics.h>
|
||||
#include <Interpreters/HashJoin/HashJoin.h>
|
||||
#include <Interpreters/HashTablesStatistics.h>
|
||||
#include <Interpreters/IJoin.h>
|
||||
#include <base/defines.h>
|
||||
#include <base/types.h>
|
||||
@ -15,6 +15,8 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct SelectQueryInfo;
|
||||
|
||||
/**
|
||||
* Can run addBlockToJoin() parallelly to speedup the join process. On test, it almose linear speedup by
|
||||
* the degree of parallelism.
|
||||
@ -80,5 +82,6 @@ private:
|
||||
Blocks dispatchBlock(const Strings & key_columns_names, const Block & from_block);
|
||||
};
|
||||
|
||||
UInt64 calculateCacheKey(std::shared_ptr<TableJoin> & table_join, const QueryTreeNodePtr & right_table_expression);
|
||||
UInt64 calculateCacheKey(
|
||||
std::shared_ptr<TableJoin> & table_join, const QueryTreeNodePtr & right_table_expression, const SelectQueryInfo & select_query_info);
|
||||
}
|
||||
|
@ -45,6 +45,16 @@ void Expected::highlight(HighlightedRange range)
|
||||
return;
|
||||
|
||||
auto it = highlights.lower_bound(range);
|
||||
|
||||
/// Highlights are sorted by their starting position.
|
||||
/// lower_bound(range) will find the first highlight where begin >= range.begin.
|
||||
/// However, this does not ensure that the previous highlight's end <= range.begin.
|
||||
/// By checking the previous highlight, if it exists, we ensure that
|
||||
/// for each highlight x and the next one y: x.end <= y.begin, thus preventing any overlap.
|
||||
|
||||
if (it != highlights.begin())
|
||||
it = std::prev(it);
|
||||
|
||||
while (it != highlights.end() && range.begin < it->end)
|
||||
{
|
||||
if (intersects(range.begin, range.end, it->begin, it->end))
|
||||
|
@ -2,8 +2,8 @@
|
||||
|
||||
#include <Core/Settings.h>
|
||||
|
||||
#include <Common/scope_guard_safe.h>
|
||||
#include <Core/ParallelReplicasMode.h>
|
||||
#include <Common/scope_guard_safe.h>
|
||||
|
||||
#include <Columns/ColumnAggregateFunction.h>
|
||||
|
||||
@ -1267,11 +1267,13 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP
|
||||
plan_to_add_cast.addStep(std::move(cast_join_columns_step));
|
||||
}
|
||||
|
||||
JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_expression,
|
||||
JoinTreeQueryPlan buildQueryPlanForJoinNode(
|
||||
const QueryTreeNodePtr & join_table_expression,
|
||||
JoinTreeQueryPlan left_join_tree_query_plan,
|
||||
JoinTreeQueryPlan right_join_tree_query_plan,
|
||||
const ColumnIdentifierSet & outer_scope_columns,
|
||||
PlannerContextPtr & planner_context)
|
||||
PlannerContextPtr & planner_context,
|
||||
const SelectQueryInfo & select_query_info)
|
||||
{
|
||||
auto & join_node = join_table_expression->as<JoinNode &>();
|
||||
if (left_join_tree_query_plan.from_stage != QueryProcessingStage::FetchColumns)
|
||||
@ -1554,7 +1556,8 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
}
|
||||
|
||||
const Block & right_header = right_plan.getCurrentHeader();
|
||||
auto join_algorithm = chooseJoinAlgorithm(table_join, join_node.getRightTableExpression(), left_header, right_header, planner_context);
|
||||
auto join_algorithm = chooseJoinAlgorithm(
|
||||
table_join, join_node.getRightTableExpression(), left_header, right_header, planner_context, select_query_info);
|
||||
|
||||
auto result_plan = QueryPlan();
|
||||
|
||||
@ -1912,11 +1915,13 @@ JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node,
|
||||
auto left_query_plan = std::move(query_plans_stack.back());
|
||||
query_plans_stack.pop_back();
|
||||
|
||||
query_plans_stack.push_back(buildQueryPlanForJoinNode(table_expression,
|
||||
query_plans_stack.push_back(buildQueryPlanForJoinNode(
|
||||
table_expression,
|
||||
std::move(left_query_plan),
|
||||
std::move(right_query_plan),
|
||||
table_expressions_outer_scope_columns[i],
|
||||
planner_context));
|
||||
planner_context,
|
||||
select_query_info));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -789,12 +789,14 @@ std::shared_ptr<DirectKeyValueJoin> tryDirectJoin(const std::shared_ptr<TableJoi
|
||||
}
|
||||
}
|
||||
|
||||
static std::shared_ptr<IJoin> tryCreateJoin(JoinAlgorithm algorithm,
|
||||
static std::shared_ptr<IJoin> tryCreateJoin(
|
||||
JoinAlgorithm algorithm,
|
||||
std::shared_ptr<TableJoin> & table_join,
|
||||
const QueryTreeNodePtr & right_table_expression,
|
||||
const Block & left_table_expression_header,
|
||||
const Block & right_table_expression_header,
|
||||
const PlannerContextPtr & planner_context)
|
||||
const PlannerContextPtr & planner_context,
|
||||
const SelectQueryInfo & select_query_info)
|
||||
{
|
||||
if (table_join->kind() == JoinKind::Paste)
|
||||
return std::make_shared<PasteJoin>(table_join, right_table_expression_header);
|
||||
@ -824,7 +826,7 @@ static std::shared_ptr<IJoin> tryCreateJoin(JoinAlgorithm algorithm,
|
||||
{
|
||||
const auto & settings = query_context->getSettingsRef();
|
||||
StatsCollectingParams params{
|
||||
calculateCacheKey(table_join, right_table_expression),
|
||||
calculateCacheKey(table_join, right_table_expression, select_query_info),
|
||||
settings[Setting::collect_hash_table_stats_during_joins],
|
||||
query_context->getServerSettings()[ServerSetting::max_entries_for_hash_table_stats],
|
||||
settings[Setting::max_size_to_preallocate_for_joins]};
|
||||
@ -866,11 +868,13 @@ static std::shared_ptr<IJoin> tryCreateJoin(JoinAlgorithm algorithm,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> & table_join,
|
||||
std::shared_ptr<IJoin> chooseJoinAlgorithm(
|
||||
std::shared_ptr<TableJoin> & table_join,
|
||||
const QueryTreeNodePtr & right_table_expression,
|
||||
const Block & left_table_expression_header,
|
||||
const Block & right_table_expression_header,
|
||||
const PlannerContextPtr & planner_context)
|
||||
const PlannerContextPtr & planner_context,
|
||||
const SelectQueryInfo & select_query_info)
|
||||
{
|
||||
if (table_join->getMixedJoinExpression()
|
||||
&& !table_join->isEnabledAlgorithm(JoinAlgorithm::HASH)
|
||||
@ -926,7 +930,14 @@ std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> & table_jo
|
||||
|
||||
for (auto algorithm : table_join->getEnabledJoinAlgorithms())
|
||||
{
|
||||
auto join = tryCreateJoin(algorithm, table_join, right_table_expression, left_table_expression_header, right_table_expression_header, planner_context);
|
||||
auto join = tryCreateJoin(
|
||||
algorithm,
|
||||
table_join,
|
||||
right_table_expression,
|
||||
left_table_expression_header,
|
||||
right_table_expression_header,
|
||||
planner_context,
|
||||
select_query_info);
|
||||
if (join)
|
||||
return join;
|
||||
}
|
||||
|
@ -12,6 +12,8 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct SelectQueryInfo;
|
||||
|
||||
/** Join clause represent single JOIN ON section clause.
|
||||
* Join clause consists of JOIN keys and conditions.
|
||||
*
|
||||
@ -218,10 +220,11 @@ std::optional<bool> tryExtractConstantFromJoinNode(const QueryTreeNodePtr & join
|
||||
* Table join structure can be modified during JOIN algorithm choosing for special JOIN algorithms.
|
||||
* For example JOIN with Dictionary engine, or JOIN with JOIN engine.
|
||||
*/
|
||||
std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> & table_join,
|
||||
std::shared_ptr<IJoin> chooseJoinAlgorithm(
|
||||
std::shared_ptr<TableJoin> & table_join,
|
||||
const QueryTreeNodePtr & right_table_expression,
|
||||
const Block & left_table_expression_header,
|
||||
const Block & right_table_expression_header,
|
||||
const PlannerContextPtr & planner_context);
|
||||
|
||||
const PlannerContextPtr & planner_context,
|
||||
const SelectQueryInfo & select_query_info);
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Common/JSONBuilder.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/IFunction.h>
|
||||
@ -52,7 +53,7 @@ static ActionsAndName splitSingleAndFilter(ActionsDAG & dag, const ActionsDAG::N
|
||||
auto filter_type = removeLowCardinality(split_filter_node->result_type);
|
||||
if (!filter_type->onlyNull() && !isUInt8(removeNullable(filter_type)))
|
||||
{
|
||||
DataTypePtr cast_type = std::make_shared<DataTypeUInt8>();
|
||||
DataTypePtr cast_type = DataTypeFactory::instance().get("Bool");
|
||||
if (filter_type->isNullable())
|
||||
cast_type = std::make_shared<DataTypeNullable>(std::move(cast_type));
|
||||
|
||||
|
@ -647,7 +647,7 @@ std::optional<String> optimizeUseAggregateProjections(QueryPlan::Node & node, Qu
|
||||
|
||||
range.begin = exact_ranges[i].end;
|
||||
ordinary_reading_marks -= exact_ranges[i].end - exact_ranges[i].begin;
|
||||
exact_count += part_with_ranges.data_part->index_granularity.getRowsCountInRange(exact_ranges[i]);
|
||||
exact_count += part_with_ranges.data_part->index_granularity->getRowsCountInRange(exact_ranges[i]);
|
||||
++i;
|
||||
}
|
||||
|
||||
|
@ -201,7 +201,7 @@ public:
|
||||
|
||||
size_t getMarkRows(size_t part_idx, size_t mark) const
|
||||
{
|
||||
return parts[part_idx].data_part->index_granularity.getMarkRows(mark);
|
||||
return parts[part_idx].data_part->index_granularity->getMarkRows(mark);
|
||||
}
|
||||
private:
|
||||
const RangesInDataParts & parts;
|
||||
@ -444,7 +444,7 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts,
|
||||
parts_ranges.push_back(
|
||||
{index_access.getValue(part_index, range.begin), range, part_index, PartsRangesIterator::EventType::RangeStart});
|
||||
|
||||
const bool value_is_defined_at_end_mark = range.end < index_granularity.getMarksCount();
|
||||
const bool value_is_defined_at_end_mark = range.end < index_granularity->getMarksCount();
|
||||
if (!value_is_defined_at_end_mark)
|
||||
continue;
|
||||
|
||||
@ -667,7 +667,7 @@ std::pair<std::vector<RangesInDataParts>, std::vector<Values>> splitIntersecting
|
||||
PartRangeIndex parts_range_start_index(parts_range_start);
|
||||
parts_ranges_queue.push({std::move(parts_range_start), std::move(parts_range_start_index)});
|
||||
|
||||
const bool value_is_defined_at_end_mark = range.end < index_granularity.getMarksCount();
|
||||
const bool value_is_defined_at_end_mark = range.end < index_granularity->getMarksCount();
|
||||
if (!value_is_defined_at_end_mark)
|
||||
continue;
|
||||
|
||||
|
@ -667,7 +667,7 @@ Pipe ReadFromMergeTree::readInOrder(
|
||||
part_with_ranges.ranges.size(),
|
||||
read_type == ReadType::InReverseOrder ? " reverse " : " ",
|
||||
part_with_ranges.data_part->name, total_rows,
|
||||
part_with_ranges.data_part->index_granularity.getMarkStartingRow(part_with_ranges.ranges.front().begin));
|
||||
part_with_ranges.data_part->index_granularity->getMarkStartingRow(part_with_ranges.ranges.front().begin));
|
||||
|
||||
MergeTreeSelectAlgorithmPtr algorithm;
|
||||
if (read_type == ReadType::InReverseOrder)
|
||||
@ -1759,7 +1759,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
|
||||
return std::make_shared<AnalysisResult>(std::move(result));
|
||||
|
||||
for (const auto & part : parts)
|
||||
total_marks_pk += part->index_granularity.getMarksCountWithoutFinal();
|
||||
total_marks_pk += part->index_granularity->getMarksCountWithoutFinal();
|
||||
parts_before_pk = parts.size();
|
||||
|
||||
auto reader_settings = getMergeTreeReaderSettings(context_, query_info_);
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <Storages/MergeTree/checkDataPart.h>
|
||||
#include <Storages/MergeTree/Backup.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
|
||||
#include <base/JSON.h>
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
@ -626,11 +627,12 @@ UInt64 IMergeTreeDataPart::getIndexSizeInAllocatedBytes() const
|
||||
|
||||
UInt64 IMergeTreeDataPart::getIndexGranularityBytes() const
|
||||
{
|
||||
return index_granularity.getBytesSize();
|
||||
return index_granularity->getBytesSize();
|
||||
}
|
||||
|
||||
UInt64 IMergeTreeDataPart::getIndexGranularityAllocatedBytes() const
|
||||
{
|
||||
return index_granularity.getBytesAllocated();
|
||||
return index_granularity->getBytesAllocated();
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::assertState(const std::initializer_list<MergeTreeDataPartState> & affordable_states) const
|
||||
@ -661,7 +663,7 @@ void IMergeTreeDataPart::assertOnDisk() const
|
||||
|
||||
UInt64 IMergeTreeDataPart::getMarksCount() const
|
||||
{
|
||||
return index_granularity.getMarksCount();
|
||||
return index_granularity->getMarksCount();
|
||||
}
|
||||
|
||||
UInt64 IMergeTreeDataPart::getExistingBytesOnDisk() const
|
||||
@ -746,7 +748,6 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
|
||||
loadChecksums(require_columns_checksums);
|
||||
|
||||
loadIndexGranularity();
|
||||
index_granularity.shrinkToFitInMemory();
|
||||
|
||||
if (!(*storage.getSettings())[MergeTreeSetting::primary_key_lazy_load])
|
||||
getIndex();
|
||||
@ -942,13 +943,13 @@ void IMergeTreeDataPart::loadIndex() const
|
||||
for (size_t i = 0; i < key_size; ++i)
|
||||
{
|
||||
loaded_index[i] = primary_key.data_types[i]->createColumn();
|
||||
loaded_index[i]->reserve(index_granularity.getMarksCount());
|
||||
loaded_index[i]->reserve(index_granularity->getMarksCount());
|
||||
}
|
||||
|
||||
String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage());
|
||||
String index_path = fs::path(getDataPartStorage().getRelativePath()) / index_name;
|
||||
auto index_file = metadata_manager->read(index_name);
|
||||
size_t marks_count = index_granularity.getMarksCount();
|
||||
size_t marks_count = index_granularity->getMarksCount();
|
||||
|
||||
Serializations key_serializations(key_size);
|
||||
for (size_t j = 0; j < key_size; ++j)
|
||||
@ -988,6 +989,8 @@ void IMergeTreeDataPart::loadIndex() const
|
||||
"{}, read: {})", index_path, marks_count, loaded_index[i]->size());
|
||||
}
|
||||
|
||||
LOG_TEST(storage.log, "Loaded primary key index for part {}, {} columns are kept in memory", name, key_size);
|
||||
|
||||
if (!index_file->eof())
|
||||
throw Exception(ErrorCodes::EXPECTED_END_OF_FILE, "Index file {} is unexpectedly long", index_path);
|
||||
|
||||
@ -1361,7 +1364,7 @@ void IMergeTreeDataPart::loadRowsCount()
|
||||
assertEOF(*buf);
|
||||
};
|
||||
|
||||
if (index_granularity.empty())
|
||||
if (index_granularity->empty())
|
||||
{
|
||||
rows_count = 0;
|
||||
}
|
||||
@ -1396,9 +1399,9 @@ void IMergeTreeDataPart::loadRowsCount()
|
||||
backQuote(column.name), rows_in_column, name, rows_count);
|
||||
}
|
||||
|
||||
size_t last_possibly_incomplete_mark_rows = index_granularity.getLastNonFinalMarkRows();
|
||||
size_t last_possibly_incomplete_mark_rows = index_granularity->getLastNonFinalMarkRows();
|
||||
/// All this rows have to be written in column
|
||||
size_t index_granularity_without_last_mark = index_granularity.getTotalRows() - last_possibly_incomplete_mark_rows;
|
||||
size_t index_granularity_without_last_mark = index_granularity->getTotalRows() - last_possibly_incomplete_mark_rows;
|
||||
/// We have more rows in column than in index granularity without last possibly incomplete mark
|
||||
if (rows_in_column < index_granularity_without_last_mark)
|
||||
{
|
||||
@ -1408,7 +1411,7 @@ void IMergeTreeDataPart::loadRowsCount()
|
||||
"and size of single value, "
|
||||
"but index granularity in part {} without last mark has {} rows, which "
|
||||
"is more than in column",
|
||||
backQuote(column.name), rows_in_column, name, index_granularity.getTotalRows());
|
||||
backQuote(column.name), rows_in_column, name, index_granularity->getTotalRows());
|
||||
}
|
||||
|
||||
/// In last mark we actually written less or equal rows than stored in last mark of index granularity
|
||||
@ -1456,8 +1459,8 @@ void IMergeTreeDataPart::loadRowsCount()
|
||||
column.name, column_size, sizeof_field);
|
||||
}
|
||||
|
||||
size_t last_mark_index_granularity = index_granularity.getLastNonFinalMarkRows();
|
||||
size_t rows_approx = index_granularity.getTotalRows();
|
||||
size_t last_mark_index_granularity = index_granularity->getLastNonFinalMarkRows();
|
||||
size_t rows_approx = index_granularity->getTotalRows();
|
||||
if (!(rows_count <= rows_approx && rows_approx < rows_count + last_mark_index_granularity))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size of column {}: "
|
||||
"{} rows, expected {}+-{} rows according to the index",
|
||||
@ -1520,7 +1523,7 @@ UInt64 IMergeTreeDataPart::readExistingRowsCount()
|
||||
|
||||
while (current_row < rows_count)
|
||||
{
|
||||
size_t rows_to_read = index_granularity.getMarkRows(current_mark);
|
||||
size_t rows_to_read = index_granularity->getMarkRows(current_mark);
|
||||
continue_reading = (current_mark != 0);
|
||||
|
||||
Columns result;
|
||||
@ -1968,6 +1971,9 @@ void IMergeTreeDataPart::initializeIndexGranularityInfo()
|
||||
index_granularity_info = MergeTreeIndexGranularityInfo(storage, *mrk_type);
|
||||
else
|
||||
index_granularity_info = MergeTreeIndexGranularityInfo(storage, part_type);
|
||||
|
||||
/// It may be converted to constant index granularity after loading it.
|
||||
index_granularity = std::make_unique<MergeTreeIndexGranularityAdaptive>();
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::remove()
|
||||
@ -2241,9 +2247,9 @@ void IMergeTreeDataPart::checkConsistency(bool require_part_metadata) const
|
||||
"part_state: [{}]",
|
||||
columns.toString(),
|
||||
index_granularity_info.getMarkSizeInBytes(columns.size()),
|
||||
index_granularity.getMarksCount(),
|
||||
index_granularity->getMarksCount(),
|
||||
index_granularity_info.describe(),
|
||||
index_granularity.describe(),
|
||||
index_granularity->describe(),
|
||||
part_state);
|
||||
|
||||
e.addMessage(debug_info);
|
||||
|
@ -321,7 +321,7 @@ public:
|
||||
|
||||
/// Amount of rows between marks
|
||||
/// As index always loaded into memory
|
||||
MergeTreeIndexGranularity index_granularity;
|
||||
MergeTreeIndexGranularityPtr index_granularity;
|
||||
|
||||
/// Index that for each part stores min and max values of a set of columns. This allows quickly excluding
|
||||
/// parts based on conditions on these columns imposed by a query.
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
|
||||
#include <Common/MemoryTrackerBlockerInThread.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
|
||||
#include <Columns/ColumnSparse.h>
|
||||
|
||||
namespace DB
|
||||
@ -11,7 +12,6 @@ namespace ErrorCodes
|
||||
extern const int NO_SUCH_COLUMN_IN_TABLE;
|
||||
}
|
||||
|
||||
|
||||
Block getIndexBlockAndPermute(const Block & block, const Names & names, const IColumn::Permutation * permutation)
|
||||
{
|
||||
Block result;
|
||||
@ -57,7 +57,7 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter(
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
const VirtualsDescriptionPtr & virtual_columns_,
|
||||
const MergeTreeWriterSettings & settings_,
|
||||
const MergeTreeIndexGranularity & index_granularity_)
|
||||
MergeTreeIndexGranularityPtr index_granularity_)
|
||||
: data_part_name(data_part_name_)
|
||||
, serializations(serializations_)
|
||||
, index_granularity_info(index_granularity_info_)
|
||||
@ -68,7 +68,7 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter(
|
||||
, settings(settings_)
|
||||
, with_final_mark(settings.can_use_adaptive_granularity)
|
||||
, data_part_storage(data_part_storage_)
|
||||
, index_granularity(index_granularity_)
|
||||
, index_granularity(std::move(index_granularity_))
|
||||
{
|
||||
}
|
||||
|
||||
@ -145,7 +145,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & writer_settings,
|
||||
const MergeTreeIndexGranularity & computed_index_granularity);
|
||||
MergeTreeIndexGranularityPtr computed_index_granularity);
|
||||
|
||||
MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
|
||||
const String & data_part_name_,
|
||||
@ -162,8 +162,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & writer_settings,
|
||||
const MergeTreeIndexGranularity & computed_index_granularity);
|
||||
|
||||
MergeTreeIndexGranularityPtr computed_index_granularity);
|
||||
|
||||
MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
|
||||
MergeTreeDataPartType part_type,
|
||||
@ -182,12 +181,26 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & writer_settings,
|
||||
const MergeTreeIndexGranularity & computed_index_granularity)
|
||||
MergeTreeIndexGranularityPtr computed_index_granularity)
|
||||
{
|
||||
if (part_type == MergeTreeDataPartType::Compact)
|
||||
return createMergeTreeDataPartCompactWriter(data_part_name_, logger_name_, serializations_, data_part_storage_,
|
||||
index_granularity_info_, storage_settings_, columns_list, column_positions, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_,
|
||||
marks_file_extension_, default_codec_, writer_settings, computed_index_granularity);
|
||||
return createMergeTreeDataPartCompactWriter(
|
||||
data_part_name_,
|
||||
logger_name_,
|
||||
serializations_,
|
||||
data_part_storage_,
|
||||
index_granularity_info_,
|
||||
storage_settings_,
|
||||
columns_list,
|
||||
column_positions,
|
||||
metadata_snapshot,
|
||||
virtual_columns,
|
||||
indices_to_recalc,
|
||||
stats_to_recalc_,
|
||||
marks_file_extension_,
|
||||
default_codec_,
|
||||
writer_settings,
|
||||
std::move(computed_index_granularity));
|
||||
if (part_type == MergeTreeDataPartType::Wide)
|
||||
return createMergeTreeDataPartWideWriter(
|
||||
data_part_name_,
|
||||
@ -204,7 +217,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
|
||||
marks_file_extension_,
|
||||
default_codec_,
|
||||
writer_settings,
|
||||
computed_index_granularity);
|
||||
std::move(computed_index_granularity));
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown part type: {}", part_type.toString());
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@ public:
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
const VirtualsDescriptionPtr & virtual_columns_,
|
||||
const MergeTreeWriterSettings & settings_,
|
||||
const MergeTreeIndexGranularity & index_granularity_ = {});
|
||||
MergeTreeIndexGranularityPtr index_granularity_);
|
||||
|
||||
virtual ~IMergeTreeDataPartWriter();
|
||||
|
||||
@ -53,7 +53,7 @@ public:
|
||||
|
||||
PlainMarksByName releaseCachedMarks();
|
||||
|
||||
const MergeTreeIndexGranularity & getIndexGranularity() const { return index_granularity; }
|
||||
MergeTreeIndexGranularityPtr getIndexGranularity() const { return index_granularity; }
|
||||
|
||||
virtual Block getColumnsSample() const = 0;
|
||||
|
||||
@ -77,7 +77,7 @@ protected:
|
||||
|
||||
MutableDataPartStoragePtr data_part_storage;
|
||||
MutableColumns index_columns;
|
||||
MergeTreeIndexGranularity index_granularity;
|
||||
MergeTreeIndexGranularityPtr index_granularity;
|
||||
/// Marks that will be saved to cache on finish.
|
||||
PlainMarksByName cached_marks;
|
||||
};
|
||||
@ -102,6 +102,6 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
|
||||
const String & marks_file_extension,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & writer_settings,
|
||||
const MergeTreeIndexGranularity & computed_index_granularity);
|
||||
MergeTreeIndexGranularityPtr computed_index_granularity);
|
||||
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ public:
|
||||
virtual void write(const Block & block) = 0;
|
||||
virtual void cancel() noexcept = 0;
|
||||
|
||||
const MergeTreeIndexGranularity & getIndexGranularity() const
|
||||
MergeTreeIndexGranularityPtr getIndexGranularity() const
|
||||
{
|
||||
return writer->getIndexGranularity();
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ public:
|
||||
|
||||
const MergeTreeIndexGranularityInfo & getIndexGranularityInfo() const override { return data_part->index_granularity_info; }
|
||||
|
||||
const MergeTreeIndexGranularity & getIndexGranularity() const override { return data_part->index_granularity; }
|
||||
const MergeTreeIndexGranularity & getIndexGranularity() const override { return *data_part->index_granularity; }
|
||||
|
||||
const SerializationInfoByName & getSerializationInfos() const override { return data_part->getSerializationInfos(); }
|
||||
|
||||
|
@ -52,7 +52,7 @@ MergeListElement::MergeListElement(const StorageID & table_id_, FutureMergedMuta
|
||||
total_size_bytes_compressed += source_part->getBytesOnDisk();
|
||||
total_size_bytes_uncompressed += source_part->getTotalColumnsSize().data_uncompressed;
|
||||
total_size_marks += source_part->getMarksCount();
|
||||
total_rows_count += source_part->index_granularity.getTotalRows();
|
||||
total_rows_count += source_part->index_granularity->getTotalRows();
|
||||
}
|
||||
|
||||
if (!future_part->parts.empty())
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
|
||||
#include <Compression/CompressedWriteBuffer.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
#include <DataTypes/Serializations/SerializationInfo.h>
|
||||
@ -73,6 +74,7 @@ namespace CurrentMetrics
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace Setting
|
||||
{
|
||||
extern const SettingsBool compile_sort_description;
|
||||
@ -100,6 +102,7 @@ namespace MergeTreeSetting
|
||||
extern const MergeTreeSettingsUInt64 vertical_merge_algorithm_min_rows_to_activate;
|
||||
extern const MergeTreeSettingsBool vertical_merge_remote_filesystem_prefetch;
|
||||
extern const MergeTreeSettingsBool prewarm_mark_cache;
|
||||
extern const MergeTreeSettingsBool use_const_adaptive_granularity;
|
||||
}
|
||||
|
||||
namespace ErrorCodes
|
||||
@ -413,10 +416,11 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() const
|
||||
};
|
||||
|
||||
auto mutations_snapshot = global_ctx->data->getMutationsSnapshot(params);
|
||||
auto storage_settings = global_ctx->data->getSettings();
|
||||
|
||||
SerializationInfo::Settings info_settings =
|
||||
{
|
||||
.ratio_of_defaults_for_sparse = (*global_ctx->data->getSettings())[MergeTreeSetting::ratio_of_defaults_for_sparse_serialization],
|
||||
.ratio_of_defaults_for_sparse = (*storage_settings)[MergeTreeSetting::ratio_of_defaults_for_sparse_serialization],
|
||||
.choose_kind = true,
|
||||
};
|
||||
|
||||
@ -465,6 +469,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() const
|
||||
|
||||
ctx->sum_input_rows_upper_bound = global_ctx->merge_list_element_ptr->total_rows_count;
|
||||
ctx->sum_compressed_bytes_upper_bound = global_ctx->merge_list_element_ptr->total_size_bytes_compressed;
|
||||
ctx->sum_uncompressed_bytes_upper_bound = global_ctx->merge_list_element_ptr->total_size_bytes_uncompressed;
|
||||
|
||||
global_ctx->chosen_merge_algorithm = chooseMergeAlgorithm();
|
||||
global_ctx->merge_list_element_ptr->merge_algorithm.store(global_ctx->chosen_merge_algorithm, std::memory_order_relaxed);
|
||||
@ -508,8 +513,14 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() const
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge algorithm must be chosen");
|
||||
}
|
||||
|
||||
/// If merge is vertical we cannot calculate it
|
||||
ctx->blocks_are_granules_size = (global_ctx->chosen_merge_algorithm == MergeAlgorithm::Vertical);
|
||||
bool use_adaptive_granularity = global_ctx->new_data_part->index_granularity_info.mark_type.adaptive;
|
||||
bool use_const_adaptive_granularity = (*storage_settings)[MergeTreeSetting::use_const_adaptive_granularity];
|
||||
|
||||
/// If merge is vertical we cannot calculate it.
|
||||
/// If granularity is constant we don't need to calculate it.
|
||||
ctx->blocks_are_granules_size = use_adaptive_granularity
|
||||
&& !use_const_adaptive_granularity
|
||||
&& global_ctx->chosen_merge_algorithm == MergeAlgorithm::Vertical;
|
||||
|
||||
/// Merged stream will be created and available as merged_stream variable
|
||||
createMergedStream();
|
||||
@ -551,7 +562,14 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() const
|
||||
}
|
||||
}
|
||||
|
||||
bool save_marks_in_cache = (*global_ctx->data->getSettings())[MergeTreeSetting::prewarm_mark_cache] && global_ctx->context->getMarkCache();
|
||||
auto index_granularity_ptr = createMergeTreeIndexGranularity(
|
||||
ctx->sum_input_rows_upper_bound,
|
||||
ctx->sum_uncompressed_bytes_upper_bound,
|
||||
*storage_settings,
|
||||
global_ctx->new_data_part->index_granularity_info,
|
||||
ctx->blocks_are_granules_size);
|
||||
|
||||
bool save_marks_in_cache = (*storage_settings)[MergeTreeSetting::prewarm_mark_cache] && global_ctx->context->getMarkCache();
|
||||
|
||||
global_ctx->to = std::make_shared<MergedBlockOutputStream>(
|
||||
global_ctx->new_data_part,
|
||||
@ -560,6 +578,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() const
|
||||
MergeTreeIndexFactory::instance().getMany(global_ctx->merging_skip_indexes),
|
||||
getStatisticsForColumns(global_ctx->merging_columns, global_ctx->metadata_snapshot),
|
||||
ctx->compression_codec,
|
||||
std::move(index_granularity_ptr),
|
||||
global_ctx->txn ? global_ctx->txn->tid : Tx::PrehistoricTID,
|
||||
/*reset_columns=*/ true,
|
||||
save_marks_in_cache,
|
||||
@ -1107,12 +1126,12 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const
|
||||
global_ctx->new_data_part,
|
||||
global_ctx->metadata_snapshot,
|
||||
columns_list,
|
||||
ctx->compression_codec,
|
||||
column_pipepline.indexes_to_recalc,
|
||||
getStatisticsForColumns(columns_list, global_ctx->metadata_snapshot),
|
||||
ctx->compression_codec,
|
||||
global_ctx->to->getIndexGranularity(),
|
||||
&global_ctx->written_offset_columns,
|
||||
save_marks_in_cache,
|
||||
global_ctx->to->getIndexGranularity());
|
||||
save_marks_in_cache);
|
||||
|
||||
ctx->column_elems_written = 0;
|
||||
}
|
||||
|
@ -247,7 +247,6 @@ private:
|
||||
bool need_remove_expired_values{false};
|
||||
bool force_ttl{false};
|
||||
CompressionCodecPtr compression_codec{nullptr};
|
||||
size_t sum_input_rows_upper_bound{0};
|
||||
std::shared_ptr<RowsSourcesTemporaryFile> rows_sources_temporary_file;
|
||||
std::optional<ColumnSizeEstimator> column_sizes{};
|
||||
|
||||
@ -265,7 +264,9 @@ private:
|
||||
std::function<bool()> is_cancelled{};
|
||||
|
||||
/// Local variables for this stage
|
||||
size_t sum_input_rows_upper_bound{0};
|
||||
size_t sum_compressed_bytes_upper_bound{0};
|
||||
size_t sum_uncompressed_bytes_upper_bound{0};
|
||||
bool blocks_are_granules_size{false};
|
||||
|
||||
LoggerPtr log{getLogger("MergeTask::PrepareStage")};
|
||||
|
@ -83,6 +83,7 @@
|
||||
#include <Storages/StorageMergeTree.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/VirtualColumnUtils.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
|
||||
|
||||
#include <boost/range/algorithm_ext/erase.hpp>
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
@ -7237,7 +7238,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
|
||||
/// It's extremely rare that some parts have final marks while others don't. To make it
|
||||
/// straightforward, disable minmax_count projection when `max(pk)' encounters any part with
|
||||
/// no final mark.
|
||||
if (need_primary_key_max_column && !part->index_granularity.hasFinalMark())
|
||||
if (need_primary_key_max_column && !part->index_granularity->hasFinalMark())
|
||||
return {};
|
||||
|
||||
real_parts.push_back(part);
|
||||
@ -8960,10 +8961,15 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::createE
|
||||
auto compression_codec = getContext()->chooseCompressionCodec(0, 0);
|
||||
|
||||
const auto & index_factory = MergeTreeIndexFactory::instance();
|
||||
MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns,
|
||||
MergedBlockOutputStream out(
|
||||
new_data_part,
|
||||
metadata_snapshot,
|
||||
columns,
|
||||
index_factory.getMany(metadata_snapshot->getSecondaryIndices()),
|
||||
ColumnsStatistics{},
|
||||
compression_codec, txn ? txn->tid : Tx::PrehistoricTID);
|
||||
compression_codec,
|
||||
std::make_shared<MergeTreeIndexGranularityAdaptive>(),
|
||||
txn ? txn->tid : Tx::PrehistoricTID);
|
||||
|
||||
bool sync_on_insert = (*settings)[MergeTreeSetting::fsync_after_insert];
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Storages/MergeTree/MergeTreeReaderCompactSingleBuffer.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartWriterCompact.h>
|
||||
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
|
||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -15,6 +16,11 @@ namespace ErrorCodes
|
||||
extern const int BAD_SIZE_OF_FILE_IN_DATA_PART;
|
||||
}
|
||||
|
||||
namespace MergeTreeSetting
|
||||
{
|
||||
extern MergeTreeSettingsBool enable_index_granularity_compression;
|
||||
}
|
||||
|
||||
MergeTreeDataPartCompact::MergeTreeDataPartCompact(
|
||||
const MergeTreeData & storage_,
|
||||
const String & name_,
|
||||
@ -62,7 +68,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & writer_settings,
|
||||
const MergeTreeIndexGranularity & computed_index_granularity)
|
||||
MergeTreeIndexGranularityPtr computed_index_granularity)
|
||||
{
|
||||
NamesAndTypesList ordered_columns_list;
|
||||
std::copy_if(columns_list.begin(), columns_list.end(), std::back_inserter(ordered_columns_list),
|
||||
@ -76,7 +82,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
|
||||
data_part_name_, logger_name_, serializations_, data_part_storage_,
|
||||
index_granularity_info_, storage_settings_, ordered_columns_list, metadata_snapshot, virtual_columns,
|
||||
indices_to_recalc, stats_to_recalc_, marks_file_extension_,
|
||||
default_codec_, writer_settings, computed_index_granularity);
|
||||
default_codec_, writer_settings, std::move(computed_index_granularity));
|
||||
}
|
||||
|
||||
|
||||
@ -95,8 +101,11 @@ void MergeTreeDataPartCompact::calculateEachColumnSizes(ColumnSizeByName & /*eac
|
||||
}
|
||||
|
||||
void MergeTreeDataPartCompact::loadIndexGranularityImpl(
|
||||
MergeTreeIndexGranularity & index_granularity_, const MergeTreeIndexGranularityInfo & index_granularity_info_,
|
||||
size_t columns_count, const IDataPartStorage & data_part_storage_)
|
||||
MergeTreeIndexGranularityPtr & index_granularity_ptr,
|
||||
const MergeTreeIndexGranularityInfo & index_granularity_info_,
|
||||
size_t columns_count,
|
||||
const IDataPartStorage & data_part_storage_,
|
||||
const MergeTreeSettings & storage_settings)
|
||||
{
|
||||
if (!index_granularity_info_.mark_type.adaptive)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeDataPartCompact cannot be created with non-adaptive granularity.");
|
||||
@ -122,10 +131,14 @@ void MergeTreeDataPartCompact::loadIndexGranularityImpl(
|
||||
marks_reader->ignore(columns_count * sizeof(MarkInCompressedFile));
|
||||
size_t granularity;
|
||||
readBinaryLittleEndian(granularity, *marks_reader);
|
||||
index_granularity_.appendMark(granularity);
|
||||
index_granularity_ptr->appendMark(granularity);
|
||||
}
|
||||
|
||||
index_granularity_.setInitialized();
|
||||
if (storage_settings[MergeTreeSetting::enable_index_granularity_compression])
|
||||
{
|
||||
if (auto new_granularity_ptr = index_granularity_ptr->optimize())
|
||||
index_granularity_ptr = std::move(new_granularity_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
void MergeTreeDataPartCompact::loadIndexGranularity()
|
||||
@ -133,7 +146,7 @@ void MergeTreeDataPartCompact::loadIndexGranularity()
|
||||
if (columns.empty())
|
||||
throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No columns in part {}", name);
|
||||
|
||||
loadIndexGranularityImpl(index_granularity, index_granularity_info, columns.size(), getDataPartStorage());
|
||||
loadIndexGranularityImpl(index_granularity, index_granularity_info, columns.size(), getDataPartStorage(), *storage.getSettings());
|
||||
}
|
||||
|
||||
void MergeTreeDataPartCompact::loadMarksToCache(const Names & column_names, MarkCache * mark_cache) const
|
||||
@ -152,7 +165,7 @@ void MergeTreeDataPartCompact::loadMarksToCache(const Names & column_names, Mark
|
||||
info_for_read,
|
||||
mark_cache,
|
||||
index_granularity_info.getMarksFilePath(DATA_FILE_NAME),
|
||||
index_granularity.getMarksCount(),
|
||||
index_granularity->getMarksCount(),
|
||||
index_granularity_info,
|
||||
/*save_marks_in_cache=*/ true,
|
||||
read_settings,
|
||||
@ -227,7 +240,7 @@ void MergeTreeDataPartCompact::doCheckConsistency(bool require_part_metadata) co
|
||||
getDataPartStorage().getRelativePath(),
|
||||
std::string(fs::path(getDataPartStorage().getFullPath()) / mrk_file_name));
|
||||
|
||||
UInt64 expected_file_size = index_granularity_info.getMarkSizeInBytes(columns.size()) * index_granularity.getMarksCount();
|
||||
UInt64 expected_file_size = index_granularity_info.getMarkSizeInBytes(columns.size()) * index_granularity->getMarksCount();
|
||||
if (expected_file_size != file_size)
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART,
|
||||
|
@ -60,8 +60,11 @@ public:
|
||||
|
||||
protected:
|
||||
static void loadIndexGranularityImpl(
|
||||
MergeTreeIndexGranularity & index_granularity_, const MergeTreeIndexGranularityInfo & index_granularity_info_,
|
||||
size_t columns_count, const IDataPartStorage & data_part_storage_);
|
||||
MergeTreeIndexGranularityPtr & index_granularity_,
|
||||
const MergeTreeIndexGranularityInfo & index_granularity_info_,
|
||||
size_t columns_count,
|
||||
const IDataPartStorage & data_part_storage_,
|
||||
const MergeTreeSettings & storage_settings);
|
||||
|
||||
void doCheckConsistency(bool require_part_metadata) const override;
|
||||
|
||||
|
@ -3,6 +3,8 @@
|
||||
#include <Storages/MergeTree/MergeTreeDataPartWriterWide.h>
|
||||
#include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
|
||||
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
|
||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
|
||||
@ -17,6 +19,11 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace MergeTreeSetting
|
||||
{
|
||||
extern MergeTreeSettingsBool enable_index_granularity_compression;
|
||||
}
|
||||
|
||||
MergeTreeDataPartWide::MergeTreeDataPartWide(
|
||||
const MergeTreeData & storage_,
|
||||
const String & name_,
|
||||
@ -68,14 +75,14 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & writer_settings,
|
||||
const MergeTreeIndexGranularity & computed_index_granularity)
|
||||
MergeTreeIndexGranularityPtr computed_index_granularity)
|
||||
{
|
||||
return std::make_unique<MergeTreeDataPartWriterWide>(
|
||||
data_part_name_, logger_name_, serializations_, data_part_storage_,
|
||||
index_granularity_info_, storage_settings_, columns_list,
|
||||
metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_,
|
||||
marks_file_extension_,
|
||||
default_codec_, writer_settings, computed_index_granularity);
|
||||
default_codec_, writer_settings, std::move(computed_index_granularity));
|
||||
}
|
||||
|
||||
|
||||
@ -114,8 +121,11 @@ ColumnSize MergeTreeDataPartWide::getColumnSizeImpl(
|
||||
}
|
||||
|
||||
void MergeTreeDataPartWide::loadIndexGranularityImpl(
|
||||
MergeTreeIndexGranularity & index_granularity_, MergeTreeIndexGranularityInfo & index_granularity_info_,
|
||||
const IDataPartStorage & data_part_storage_, const std::string & any_column_file_name)
|
||||
MergeTreeIndexGranularityPtr & index_granularity_ptr,
|
||||
MergeTreeIndexGranularityInfo & index_granularity_info_,
|
||||
const IDataPartStorage & data_part_storage_,
|
||||
const std::string & any_column_file_name,
|
||||
const MergeTreeSettings & storage_settings)
|
||||
{
|
||||
index_granularity_info_.changeGranularityIfRequired(data_part_storage_);
|
||||
|
||||
@ -127,12 +137,13 @@ void MergeTreeDataPartWide::loadIndexGranularityImpl(
|
||||
std::string(fs::path(data_part_storage_.getFullPath()) / marks_file_path));
|
||||
|
||||
size_t marks_file_size = data_part_storage_.getFileSize(marks_file_path);
|
||||
size_t fixed_granularity = index_granularity_info_.fixed_index_granularity;
|
||||
|
||||
if (!index_granularity_info_.mark_type.adaptive && !index_granularity_info_.mark_type.compressed)
|
||||
{
|
||||
/// The most easy way - no need to read the file, everything is known from its size.
|
||||
size_t marks_count = marks_file_size / index_granularity_info_.getMarkSizeInBytes();
|
||||
index_granularity_.resizeWithFixedGranularity(marks_count, index_granularity_info_.fixed_index_granularity); /// all the same
|
||||
index_granularity_ptr = std::make_shared<MergeTreeIndexGranularityConstant>(fixed_granularity, fixed_granularity, marks_count, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -145,6 +156,7 @@ void MergeTreeDataPartWide::loadIndexGranularityImpl(
|
||||
marks_reader = std::make_unique<CompressedReadBufferFromFile>(std::move(marks_file));
|
||||
|
||||
size_t marks_count = 0;
|
||||
|
||||
while (!marks_reader->eof())
|
||||
{
|
||||
MarkInCompressedFile mark;
|
||||
@ -157,15 +169,20 @@ void MergeTreeDataPartWide::loadIndexGranularityImpl(
|
||||
if (index_granularity_info_.mark_type.adaptive)
|
||||
{
|
||||
readBinaryLittleEndian(granularity, *marks_reader);
|
||||
index_granularity_.appendMark(granularity);
|
||||
index_granularity_ptr->appendMark(granularity);
|
||||
}
|
||||
}
|
||||
|
||||
if (!index_granularity_info_.mark_type.adaptive)
|
||||
index_granularity_.resizeWithFixedGranularity(marks_count, index_granularity_info_.fixed_index_granularity); /// all the same
|
||||
{
|
||||
index_granularity_ptr = std::make_shared<MergeTreeIndexGranularityConstant>(fixed_granularity, fixed_granularity, marks_count, false);
|
||||
}
|
||||
else if (storage_settings[MergeTreeSetting::enable_index_granularity_compression])
|
||||
{
|
||||
if (auto new_granularity_ptr = index_granularity_ptr->optimize())
|
||||
index_granularity_ptr = std::move(new_granularity_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
index_granularity_.setInitialized();
|
||||
}
|
||||
|
||||
void MergeTreeDataPartWide::loadIndexGranularity()
|
||||
@ -179,7 +196,7 @@ void MergeTreeDataPartWide::loadIndexGranularity()
|
||||
"There are no files for column {} in part {}",
|
||||
columns.front().name, getDataPartStorage().getFullPath());
|
||||
|
||||
loadIndexGranularityImpl(index_granularity, index_granularity_info, getDataPartStorage(), *any_column_filename);
|
||||
loadIndexGranularityImpl(index_granularity, index_granularity_info, getDataPartStorage(), *any_column_filename, *storage.getSettings());
|
||||
}
|
||||
|
||||
void MergeTreeDataPartWide::loadMarksToCache(const Names & column_names, MarkCache * mark_cache) const
|
||||
@ -209,7 +226,7 @@ void MergeTreeDataPartWide::loadMarksToCache(const Names & column_names, MarkCac
|
||||
info_for_read,
|
||||
mark_cache,
|
||||
index_granularity_info.getMarksFilePath(*stream_name),
|
||||
index_granularity.getMarksCount(),
|
||||
index_granularity->getMarksCount(),
|
||||
index_granularity_info,
|
||||
/*save_marks_in_cache=*/ true,
|
||||
read_settings,
|
||||
|
@ -55,8 +55,11 @@ public:
|
||||
|
||||
protected:
|
||||
static void loadIndexGranularityImpl(
|
||||
MergeTreeIndexGranularity & index_granularity_, MergeTreeIndexGranularityInfo & index_granularity_info_,
|
||||
const IDataPartStorage & data_part_storage_, const std::string & any_column_file_name);
|
||||
MergeTreeIndexGranularityPtr & index_granularity_ptr,
|
||||
MergeTreeIndexGranularityInfo & index_granularity_info_,
|
||||
const IDataPartStorage & data_part_storage_,
|
||||
const std::string & any_column_file_name,
|
||||
const MergeTreeSettings & storage_settings);
|
||||
|
||||
void doCheckConsistency(bool require_part_metadata) const override;
|
||||
|
||||
|
@ -25,13 +25,13 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & settings_,
|
||||
const MergeTreeIndexGranularity & index_granularity_)
|
||||
MergeTreeIndexGranularityPtr index_granularity_)
|
||||
: MergeTreeDataPartWriterOnDisk(
|
||||
data_part_name_, logger_name_, serializations_,
|
||||
data_part_storage_, index_granularity_info_, storage_settings_,
|
||||
columns_list_, metadata_snapshot_, virtual_columns_,
|
||||
indices_to_recalc_, stats_to_recalc, marks_file_extension_,
|
||||
default_codec_, settings_, index_granularity_)
|
||||
default_codec_, settings_, std::move(index_granularity_))
|
||||
, plain_file(getDataPartStorage().writeFile(
|
||||
MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION,
|
||||
settings.max_compress_block_size,
|
||||
@ -189,13 +189,13 @@ void MergeTreeDataPartWriterCompact::write(const Block & block, const IColumn::P
|
||||
header = result_block.cloneEmpty();
|
||||
|
||||
columns_buffer.add(result_block.mutateColumns());
|
||||
size_t current_mark_rows = index_granularity.getMarkRows(getCurrentMark());
|
||||
size_t current_mark_rows = index_granularity->getMarkRows(getCurrentMark());
|
||||
size_t rows_in_buffer = columns_buffer.size();
|
||||
|
||||
if (rows_in_buffer >= current_mark_rows)
|
||||
{
|
||||
Block flushed_block = header.cloneWithColumns(columns_buffer.releaseColumns());
|
||||
auto granules_to_write = getGranulesToWrite(index_granularity, flushed_block.rows(), getCurrentMark(), /* last_block = */ false);
|
||||
auto granules_to_write = getGranulesToWrite(*index_granularity, flushed_block.rows(), getCurrentMark(), /* last_block = */ false);
|
||||
writeDataBlockPrimaryIndexAndSkipIndices(flushed_block, granules_to_write);
|
||||
setCurrentMark(getCurrentMark() + granules_to_write.size());
|
||||
calculateAndSerializeStatistics(flushed_block);
|
||||
@ -274,12 +274,11 @@ void MergeTreeDataPartWriterCompact::fillDataChecksums(MergeTreeDataPartChecksum
|
||||
if (columns_buffer.size() != 0)
|
||||
{
|
||||
auto block = header.cloneWithColumns(columns_buffer.releaseColumns());
|
||||
auto granules_to_write = getGranulesToWrite(index_granularity, block.rows(), getCurrentMark(), /* last_block = */ true);
|
||||
auto granules_to_write = getGranulesToWrite(*index_granularity, block.rows(), getCurrentMark(), /*last_block=*/ true);
|
||||
if (!granules_to_write.back().is_complete)
|
||||
{
|
||||
/// Correct last mark as it should contain exact amount of rows.
|
||||
index_granularity.popMark();
|
||||
index_granularity.appendMark(granules_to_write.back().rows_to_write);
|
||||
index_granularity->adjustLastMark(granules_to_write.back().rows_to_write);
|
||||
}
|
||||
writeDataBlockPrimaryIndexAndSkipIndices(block, granules_to_write);
|
||||
}
|
||||
@ -375,11 +374,11 @@ static void fillIndexGranularityImpl(
|
||||
void MergeTreeDataPartWriterCompact::fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block)
|
||||
{
|
||||
size_t index_offset = 0;
|
||||
if (index_granularity.getMarksCount() > getCurrentMark())
|
||||
index_offset = index_granularity.getMarkRows(getCurrentMark()) - columns_buffer.size();
|
||||
if (index_granularity->getMarksCount() > getCurrentMark())
|
||||
index_offset = index_granularity->getMarkRows(getCurrentMark()) - columns_buffer.size();
|
||||
|
||||
fillIndexGranularityImpl(
|
||||
index_granularity,
|
||||
*index_granularity,
|
||||
index_offset,
|
||||
index_granularity_for_block,
|
||||
rows_in_block);
|
||||
|
@ -27,7 +27,7 @@ public:
|
||||
const String & marks_file_extension,
|
||||
const CompressionCodecPtr & default_codec,
|
||||
const MergeTreeWriterSettings & settings,
|
||||
const MergeTreeIndexGranularity & index_granularity);
|
||||
MergeTreeIndexGranularityPtr index_granularity_);
|
||||
|
||||
void write(const Block & block, const IColumn::Permutation * permutation) override;
|
||||
|
||||
|
@ -178,20 +178,20 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & settings_,
|
||||
const MergeTreeIndexGranularity & index_granularity_)
|
||||
MergeTreeIndexGranularityPtr index_granularity_)
|
||||
: IMergeTreeDataPartWriter(
|
||||
data_part_name_, serializations_, data_part_storage_, index_granularity_info_,
|
||||
storage_settings_, columns_list_, metadata_snapshot_, virtual_columns_, settings_, index_granularity_)
|
||||
storage_settings_, columns_list_, metadata_snapshot_, virtual_columns_, settings_, std::move(index_granularity_))
|
||||
, skip_indices(indices_to_recalc_)
|
||||
, stats(stats_to_recalc_)
|
||||
, marks_file_extension(marks_file_extension_)
|
||||
, default_codec(default_codec_)
|
||||
, compute_granularity(index_granularity.empty())
|
||||
, compute_granularity(index_granularity->empty())
|
||||
, compress_primary_key(settings.compress_primary_key)
|
||||
, execution_stats(skip_indices.size(), stats.size())
|
||||
, log(getLogger(logger_name_ + " (DataPartWriter)"))
|
||||
{
|
||||
if (settings.blocks_are_granules_size && !index_granularity.empty())
|
||||
if (settings.blocks_are_granules_size && !index_granularity->empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Can't take information about index granularity from blocks, when non empty index_granularity array specified");
|
||||
|
||||
@ -226,63 +226,15 @@ void MergeTreeDataPartWriterOnDisk::cancel() noexcept
|
||||
store.second->cancel();
|
||||
}
|
||||
|
||||
// Implementation is split into static functions for ability
|
||||
/// of making unit tests without creation instance of IMergeTreeDataPartWriter,
|
||||
/// which requires a lot of dependencies and access to filesystem.
|
||||
static size_t computeIndexGranularityImpl(
|
||||
const Block & block,
|
||||
size_t index_granularity_bytes,
|
||||
size_t fixed_index_granularity_rows,
|
||||
bool blocks_are_granules,
|
||||
bool can_use_adaptive_index_granularity)
|
||||
{
|
||||
size_t rows_in_block = block.rows();
|
||||
size_t index_granularity_for_block;
|
||||
|
||||
if (!can_use_adaptive_index_granularity)
|
||||
{
|
||||
index_granularity_for_block = fixed_index_granularity_rows;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t block_size_in_memory = block.bytes();
|
||||
if (blocks_are_granules)
|
||||
{
|
||||
index_granularity_for_block = rows_in_block;
|
||||
}
|
||||
else if (block_size_in_memory >= index_granularity_bytes)
|
||||
{
|
||||
size_t granules_in_block = block_size_in_memory / index_granularity_bytes;
|
||||
index_granularity_for_block = rows_in_block / granules_in_block;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t size_of_row_in_bytes = std::max(block_size_in_memory / rows_in_block, 1UL);
|
||||
index_granularity_for_block = index_granularity_bytes / size_of_row_in_bytes;
|
||||
}
|
||||
}
|
||||
|
||||
/// We should be less or equal than fixed index granularity.
|
||||
/// But if block size is a granule size then do not adjust it.
|
||||
/// Granularity greater than fixed granularity might come from compact part.
|
||||
if (!blocks_are_granules)
|
||||
index_granularity_for_block = std::min(fixed_index_granularity_rows, index_granularity_for_block);
|
||||
|
||||
/// Very rare case when index granularity bytes less than single row.
|
||||
if (index_granularity_for_block == 0)
|
||||
index_granularity_for_block = 1;
|
||||
|
||||
return index_granularity_for_block;
|
||||
}
|
||||
|
||||
size_t MergeTreeDataPartWriterOnDisk::computeIndexGranularity(const Block & block) const
|
||||
{
|
||||
return computeIndexGranularityImpl(
|
||||
block,
|
||||
(*storage_settings)[MergeTreeSetting::index_granularity_bytes],
|
||||
(*storage_settings)[MergeTreeSetting::index_granularity],
|
||||
settings.blocks_are_granules_size,
|
||||
settings.can_use_adaptive_granularity);
|
||||
return DB::computeIndexGranularity(
|
||||
block.rows(),
|
||||
block.bytes(),
|
||||
(*storage_settings)[MergeTreeSetting::index_granularity_bytes],
|
||||
(*storage_settings)[MergeTreeSetting::index_granularity],
|
||||
settings.blocks_are_granules_size,
|
||||
settings.can_use_adaptive_granularity);
|
||||
}
|
||||
|
||||
void MergeTreeDataPartWriterOnDisk::initPrimaryIndex()
|
||||
@ -471,7 +423,7 @@ void MergeTreeDataPartWriterOnDisk::fillPrimaryIndexChecksums(MergeTreeData::Dat
|
||||
{
|
||||
bool write_final_mark = (with_final_mark && data_written);
|
||||
if (write_final_mark && compute_granularity)
|
||||
index_granularity.appendMark(0);
|
||||
index_granularity->appendMark(0);
|
||||
|
||||
if (index_file_hashing_stream)
|
||||
{
|
||||
|
@ -123,7 +123,7 @@ public:
|
||||
const String & marks_file_extension,
|
||||
const CompressionCodecPtr & default_codec,
|
||||
const MergeTreeWriterSettings & settings,
|
||||
const MergeTreeIndexGranularity & index_granularity);
|
||||
MergeTreeIndexGranularityPtr index_granularity_);
|
||||
|
||||
void setWrittenOffsetColumns(WrittenOffsetColumns * written_offset_columns_)
|
||||
{
|
||||
|
@ -99,13 +99,13 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
|
||||
const String & marks_file_extension_,
|
||||
const CompressionCodecPtr & default_codec_,
|
||||
const MergeTreeWriterSettings & settings_,
|
||||
const MergeTreeIndexGranularity & index_granularity_)
|
||||
MergeTreeIndexGranularityPtr index_granularity_)
|
||||
: MergeTreeDataPartWriterOnDisk(
|
||||
data_part_name_, logger_name_, serializations_,
|
||||
data_part_storage_, index_granularity_info_, storage_settings_,
|
||||
columns_list_, metadata_snapshot_, virtual_columns_,
|
||||
indices_to_recalc_, stats_to_recalc_, marks_file_extension_,
|
||||
default_codec_, settings_, index_granularity_)
|
||||
default_codec_, settings_, std::move(index_granularity_))
|
||||
{
|
||||
if (settings.save_marks_in_cache)
|
||||
{
|
||||
@ -238,8 +238,8 @@ void MergeTreeDataPartWriterWide::shiftCurrentMark(const Granules & granules_wri
|
||||
if (settings.can_use_adaptive_granularity && settings.blocks_are_granules_size)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incomplete granules are not allowed while blocks are granules size. "
|
||||
"Mark number {} (rows {}), rows written in last mark {}, rows to write in last mark from block {} (from row {}), "
|
||||
"total marks currently {}", last_granule.mark_number, index_granularity.getMarkRows(last_granule.mark_number),
|
||||
rows_written_in_last_mark, last_granule.rows_to_write, last_granule.start_row, index_granularity.getMarksCount());
|
||||
"total marks currently {}", last_granule.mark_number, index_granularity->getMarkRows(last_granule.mark_number),
|
||||
rows_written_in_last_mark, last_granule.rows_to_write, last_granule.start_row, index_granularity->getMarksCount());
|
||||
|
||||
/// Shift forward except last granule
|
||||
setCurrentMark(getCurrentMark() + granules_written.size() - 1);
|
||||
@ -273,10 +273,15 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm
|
||||
/// but not in case of vertical part of vertical merge)
|
||||
if (compute_granularity)
|
||||
{
|
||||
size_t index_granularity_for_block = computeIndexGranularity(block_to_write);
|
||||
size_t index_granularity_for_block;
|
||||
if (auto constant_granularity = index_granularity->getConstantGranularity())
|
||||
index_granularity_for_block = *constant_granularity;
|
||||
else
|
||||
index_granularity_for_block = computeIndexGranularity(block_to_write);
|
||||
|
||||
if (rows_written_in_last_mark > 0)
|
||||
{
|
||||
size_t rows_left_in_last_mark = index_granularity.getMarkRows(getCurrentMark()) - rows_written_in_last_mark;
|
||||
size_t rows_left_in_last_mark = index_granularity->getMarkRows(getCurrentMark()) - rows_written_in_last_mark;
|
||||
/// Previous granularity was much bigger than our new block's
|
||||
/// granularity let's adjust it, because we want add new
|
||||
/// heavy-weight blocks into small old granule.
|
||||
@ -294,7 +299,7 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm
|
||||
fillIndexGranularity(index_granularity_for_block, block_to_write.rows());
|
||||
}
|
||||
|
||||
auto granules_to_write = getGranulesToWrite(index_granularity, block_to_write.rows(), getCurrentMark(), rows_written_in_last_mark);
|
||||
auto granules_to_write = getGranulesToWrite(*index_granularity, block_to_write.rows(), getCurrentMark(), rows_written_in_last_mark);
|
||||
|
||||
auto offset_columns = written_offset_columns ? *written_offset_columns : WrittenOffsetColumns{};
|
||||
Block primary_key_block;
|
||||
@ -482,7 +487,7 @@ void MergeTreeDataPartWriterWide::writeColumn(
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"We have to add new mark for column, but already have non written mark. "
|
||||
"Current mark {}, total marks {}, offset {}",
|
||||
getCurrentMark(), index_granularity.getMarksCount(), rows_written_in_last_mark);
|
||||
getCurrentMark(), index_granularity->getMarksCount(), rows_written_in_last_mark);
|
||||
last_non_written_marks[name] = getCurrentMarksForColumn(name_and_type, column.getPtr(), offset_columns);
|
||||
}
|
||||
|
||||
@ -502,7 +507,7 @@ void MergeTreeDataPartWriterWide::writeColumn(
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "No mark was saved for incomplete granule for column {}", backQuoteIfNeed(name));
|
||||
|
||||
for (const auto & mark : marks_it->second)
|
||||
flushMarkToFile(mark, index_granularity.getMarkRows(granule.mark_number));
|
||||
flushMarkToFile(mark, index_granularity->getMarkRows(granule.mark_number));
|
||||
last_non_written_marks.erase(marks_it);
|
||||
}
|
||||
}
|
||||
@ -549,10 +554,10 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
|
||||
|
||||
for (mark_num = 0; !mrk_in->eof(); ++mark_num)
|
||||
{
|
||||
if (mark_num > index_granularity.getMarksCount())
|
||||
if (mark_num > index_granularity->getMarksCount())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Incorrect number of marks in memory {}, on disk (at least) {}",
|
||||
index_granularity.getMarksCount(), mark_num + 1);
|
||||
index_granularity->getMarksCount(), mark_num + 1);
|
||||
|
||||
readBinaryLittleEndian(offset_in_compressed_file, *mrk_in);
|
||||
readBinaryLittleEndian(offset_in_decompressed_block, *mrk_in);
|
||||
@ -583,10 +588,10 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Still have {} rows in bin stream, last mark #{}"
|
||||
" index granularity size {}, last rows {}",
|
||||
column->size(), mark_num, index_granularity.getMarksCount(), index_granularity_rows);
|
||||
column->size(), mark_num, index_granularity->getMarksCount(), index_granularity_rows);
|
||||
}
|
||||
|
||||
if (index_granularity_rows != index_granularity.getMarkRows(mark_num))
|
||||
if (index_granularity_rows != index_granularity->getMarkRows(mark_num))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
@ -594,8 +599,8 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
|
||||
" (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}",
|
||||
getDataPartStorage().getFullPath(),
|
||||
mark_num, offset_in_compressed_file, offset_in_decompressed_block,
|
||||
index_granularity.getMarkRows(mark_num), index_granularity_rows,
|
||||
index_granularity.getMarksCount());
|
||||
index_granularity->getMarkRows(mark_num), index_granularity_rows,
|
||||
index_granularity->getMarksCount());
|
||||
}
|
||||
|
||||
auto column = type->createColumn();
|
||||
@ -630,7 +635,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
|
||||
ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for mark #{} (compressed offset {}, decompressed offset {}), "
|
||||
"actually in bin file {}, in mrk file {}, total marks {}",
|
||||
mark_num, offset_in_compressed_file, offset_in_decompressed_block, column->size(),
|
||||
index_granularity.getMarkRows(mark_num), index_granularity.getMarksCount());
|
||||
index_granularity->getMarkRows(mark_num), index_granularity->getMarksCount());
|
||||
}
|
||||
}
|
||||
|
||||
@ -638,7 +643,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Still have something in marks stream, last mark #{}"
|
||||
" index granularity size {}, last rows {}",
|
||||
mark_num, index_granularity.getMarksCount(), index_granularity_rows);
|
||||
mark_num, index_granularity->getMarksCount(), index_granularity_rows);
|
||||
if (!bin_in.eof())
|
||||
{
|
||||
auto column = type->createColumn();
|
||||
@ -648,7 +653,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Still have {} rows in bin stream, last mark #{}"
|
||||
" index granularity size {}, last rows {}",
|
||||
column->size(), mark_num, index_granularity.getMarksCount(), index_granularity_rows);
|
||||
column->size(), mark_num, index_granularity->getMarksCount(), index_granularity_rows);
|
||||
}
|
||||
}
|
||||
|
||||
@ -665,8 +670,8 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(MergeTreeDataPartChecksums &
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Incomplete granule is not allowed while blocks are granules size even for last granule. "
|
||||
"Mark number {} (rows {}), rows written for last mark {}, total marks {}",
|
||||
getCurrentMark(), index_granularity.getMarkRows(getCurrentMark()),
|
||||
rows_written_in_last_mark, index_granularity.getMarksCount());
|
||||
getCurrentMark(), index_granularity->getMarkRows(getCurrentMark()),
|
||||
rows_written_in_last_mark, index_granularity->getMarksCount());
|
||||
|
||||
adjustLastMarkIfNeedAndFlushToDisk(rows_written_in_last_mark);
|
||||
}
|
||||
@ -796,16 +801,16 @@ static void fillIndexGranularityImpl(
|
||||
|
||||
void MergeTreeDataPartWriterWide::fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block)
|
||||
{
|
||||
if (getCurrentMark() < index_granularity.getMarksCount() && getCurrentMark() != index_granularity.getMarksCount() - 1)
|
||||
if (getCurrentMark() < index_granularity->getMarksCount() && getCurrentMark() != index_granularity->getMarksCount() - 1)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to add marks, while current mark {}, but total marks {}",
|
||||
getCurrentMark(), index_granularity.getMarksCount());
|
||||
getCurrentMark(), index_granularity->getMarksCount());
|
||||
|
||||
size_t index_offset = 0;
|
||||
if (rows_written_in_last_mark != 0)
|
||||
index_offset = index_granularity.getLastMarkRows() - rows_written_in_last_mark;
|
||||
index_offset = index_granularity->getLastMarkRows() - rows_written_in_last_mark;
|
||||
|
||||
fillIndexGranularityImpl(
|
||||
index_granularity,
|
||||
*index_granularity,
|
||||
index_offset,
|
||||
index_granularity_for_block,
|
||||
rows_in_block);
|
||||
@ -824,27 +829,26 @@ void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_
|
||||
/// other columns
|
||||
if (compute_granularity && settings.can_use_adaptive_granularity)
|
||||
{
|
||||
if (getCurrentMark() != index_granularity.getMarksCount() - 1)
|
||||
if (getCurrentMark() != index_granularity->getMarksCount() - 1)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Non last mark {} (with {} rows) having rows offset {}, total marks {}",
|
||||
getCurrentMark(), index_granularity.getMarkRows(getCurrentMark()),
|
||||
rows_written_in_last_mark, index_granularity.getMarksCount());
|
||||
getCurrentMark(), index_granularity->getMarkRows(getCurrentMark()),
|
||||
rows_written_in_last_mark, index_granularity->getMarksCount());
|
||||
|
||||
index_granularity.popMark();
|
||||
index_granularity.appendMark(new_rows_in_last_mark);
|
||||
index_granularity->adjustLastMark(new_rows_in_last_mark);
|
||||
}
|
||||
|
||||
/// Last mark should be filled, otherwise it's a bug
|
||||
if (last_non_written_marks.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "No saved marks for last mark {} having rows offset {}, total marks {}",
|
||||
getCurrentMark(), rows_written_in_last_mark, index_granularity.getMarksCount());
|
||||
getCurrentMark(), rows_written_in_last_mark, index_granularity->getMarksCount());
|
||||
|
||||
if (rows_written_in_last_mark == new_rows_in_last_mark)
|
||||
{
|
||||
for (const auto & [name, marks] : last_non_written_marks)
|
||||
{
|
||||
for (const auto & mark : marks)
|
||||
flushMarkToFile(mark, index_granularity.getMarkRows(getCurrentMark()));
|
||||
flushMarkToFile(mark, index_granularity->getMarkRows(getCurrentMark()));
|
||||
}
|
||||
|
||||
last_non_written_marks.clear();
|
||||
|
@ -37,7 +37,7 @@ public:
|
||||
const String & marks_file_extension,
|
||||
const CompressionCodecPtr & default_codec,
|
||||
const MergeTreeWriterSettings & settings,
|
||||
const MergeTreeIndexGranularity & index_granularity);
|
||||
MergeTreeIndexGranularityPtr index_granularity_);
|
||||
|
||||
void write(const Block & block, const IColumn::Permutation * permutation) override;
|
||||
|
||||
|
@ -129,7 +129,7 @@ size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead(
|
||||
{
|
||||
MarkRanges part_ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, {}, &exact_ranges, settings, log);
|
||||
for (const auto & range : part_ranges)
|
||||
rows_count += part->index_granularity.getRowsCountInRange(range);
|
||||
rows_count += part->index_granularity->getRowsCountInRange(range);
|
||||
}
|
||||
UNUSED(exact_ranges);
|
||||
|
||||
@ -688,7 +688,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
|
||||
auto & part = parts[part_index];
|
||||
|
||||
RangesInDataPart ranges(part, part_index);
|
||||
size_t total_marks_count = part->index_granularity.getMarksCountWithoutFinal();
|
||||
size_t total_marks_count = part->index_granularity->getMarksCountWithoutFinal();
|
||||
|
||||
if (metadata_snapshot->hasPrimaryKey() || part_offset_condition)
|
||||
{
|
||||
@ -1044,11 +1044,11 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
|
||||
{
|
||||
MarkRanges res;
|
||||
|
||||
size_t marks_count = part->index_granularity.getMarksCount();
|
||||
size_t marks_count = part->index_granularity->getMarksCount();
|
||||
if (marks_count == 0)
|
||||
return res;
|
||||
|
||||
bool has_final_mark = part->index_granularity.hasFinalMark();
|
||||
bool has_final_mark = part->index_granularity->hasFinalMark();
|
||||
|
||||
bool key_condition_useful = !key_condition.alwaysUnknownOrTrue();
|
||||
bool part_offset_condition_useful = part_offset_condition && !part_offset_condition->alwaysUnknownOrTrue();
|
||||
@ -1160,16 +1160,16 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
|
||||
|
||||
auto check_part_offset_condition = [&]()
|
||||
{
|
||||
auto begin = part->index_granularity.getMarkStartingRow(range.begin);
|
||||
auto end = part->index_granularity.getMarkStartingRow(range.end) - 1;
|
||||
auto begin = part->index_granularity->getMarkStartingRow(range.begin);
|
||||
auto end = part->index_granularity->getMarkStartingRow(range.end) - 1;
|
||||
if (begin > end)
|
||||
{
|
||||
/// Empty mark (final mark)
|
||||
return BoolMask(false, true);
|
||||
}
|
||||
|
||||
part_offset_left[0] = part->index_granularity.getMarkStartingRow(range.begin);
|
||||
part_offset_right[0] = part->index_granularity.getMarkStartingRow(range.end) - 1;
|
||||
part_offset_left[0] = part->index_granularity->getMarkStartingRow(range.begin);
|
||||
part_offset_right[0] = part->index_granularity->getMarkStartingRow(range.end) - 1;
|
||||
part_offset_left[1] = part->name;
|
||||
part_offset_right[1] = part->name;
|
||||
|
||||
@ -1381,9 +1381,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
|
||||
part->index_granularity_info.fixed_index_granularity,
|
||||
part->index_granularity_info.index_granularity_bytes);
|
||||
|
||||
size_t marks_count = part->getMarksCount();
|
||||
size_t final_mark = part->index_granularity.hasFinalMark();
|
||||
size_t index_marks_count = (marks_count - final_mark + index_granularity - 1) / index_granularity;
|
||||
size_t marks_count = part->index_granularity->getMarksCountWithoutFinal();
|
||||
size_t index_marks_count = (marks_count + index_granularity - 1) / index_granularity;
|
||||
|
||||
MarkRanges index_ranges;
|
||||
for (const auto & range : ranges)
|
||||
@ -1431,8 +1430,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
|
||||
|
||||
for (auto row : rows)
|
||||
{
|
||||
const MergeTreeIndexGranularity & merge_tree_index_granularity = part->index_granularity;
|
||||
size_t num_marks = merge_tree_index_granularity.countMarksForRows(index_mark * index_granularity, row);
|
||||
size_t num_marks = part->index_granularity->countMarksForRows(index_mark * index_granularity, row);
|
||||
|
||||
MarkRange data_range(
|
||||
std::max(ranges[i].begin, (index_mark * index_granularity) + num_marks),
|
||||
@ -1505,9 +1503,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex(
|
||||
part->index_granularity_info.fixed_index_granularity,
|
||||
part->index_granularity_info.index_granularity_bytes);
|
||||
|
||||
size_t marks_count = part->getMarksCount();
|
||||
size_t final_mark = part->index_granularity.hasFinalMark();
|
||||
size_t index_marks_count = (marks_count - final_mark + index_granularity - 1) / index_granularity;
|
||||
size_t marks_count = part->index_granularity->getMarksCountWithoutFinal();
|
||||
size_t index_marks_count = (marks_count + index_granularity - 1) / index_granularity;
|
||||
|
||||
std::vector<std::unique_ptr<MergeTreeIndexReader>> readers;
|
||||
for (const auto & index_helper : indices)
|
||||
@ -1607,9 +1604,7 @@ void MergeTreeDataSelectExecutor::selectPartsToRead(
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t num_granules = part->getMarksCount();
|
||||
if (num_granules && part->index_granularity.hasFinalMark())
|
||||
--num_granules;
|
||||
size_t num_granules = part->index_granularity->getMarksCountWithoutFinal();
|
||||
|
||||
counters.num_initial_selected_parts += 1;
|
||||
counters.num_initial_selected_granules += num_granules;
|
||||
@ -1676,9 +1671,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter(
|
||||
if (part->uuid != UUIDHelpers::Nil && ignored_part_uuids->has(part->uuid))
|
||||
continue;
|
||||
|
||||
size_t num_granules = part->getMarksCount();
|
||||
if (num_granules && part->index_granularity.hasFinalMark())
|
||||
--num_granules;
|
||||
size_t num_granules = part->index_granularity->getMarksCountWithoutFinal();
|
||||
|
||||
counters.num_initial_selected_parts += 1;
|
||||
counters.num_initial_selected_granules += num_granules;
|
||||
|
@ -686,6 +686,13 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
|
||||
auto compression_codec = data.getContext()->chooseCompressionCodec(0, 0);
|
||||
bool save_marks_in_cache = (*data_settings)[MergeTreeSetting::prewarm_mark_cache] && data.getContext()->getMarkCache();
|
||||
|
||||
auto index_granularity_ptr = createMergeTreeIndexGranularity(
|
||||
block.rows(),
|
||||
block.bytes(),
|
||||
*data.getSettings(),
|
||||
new_data_part->index_granularity_info,
|
||||
/*blocks_are_granules=*/ false);
|
||||
|
||||
auto out = std::make_unique<MergedBlockOutputStream>(
|
||||
new_data_part,
|
||||
metadata_snapshot,
|
||||
@ -693,6 +700,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
|
||||
indices,
|
||||
statistics,
|
||||
compression_codec,
|
||||
std::move(index_granularity_ptr),
|
||||
context->getCurrentTransaction() ? context->getCurrentTransaction()->tid : Tx::PrehistoricTID,
|
||||
/*reset_columns=*/ false,
|
||||
save_marks_in_cache,
|
||||
@ -833,6 +841,13 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl(
|
||||
auto compression_codec = data.getContext()->chooseCompressionCodec(0, 0);
|
||||
bool save_marks_in_cache = (*data.getSettings())[MergeTreeSetting::prewarm_mark_cache] && data.getContext()->getMarkCache();
|
||||
|
||||
auto index_granularity_ptr = createMergeTreeIndexGranularity(
|
||||
block.rows(),
|
||||
block.bytes(),
|
||||
*data.getSettings(),
|
||||
new_data_part->index_granularity_info,
|
||||
/*blocks_are_granules=*/ false);
|
||||
|
||||
auto out = std::make_unique<MergedBlockOutputStream>(
|
||||
new_data_part,
|
||||
metadata_snapshot,
|
||||
@ -841,6 +856,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl(
|
||||
/// TODO(hanfei): It should be helpful to write statistics for projection result.
|
||||
ColumnsStatistics{},
|
||||
compression_codec,
|
||||
std::move(index_granularity_ptr),
|
||||
Tx::PrehistoricTID,
|
||||
/*reset_columns=*/ false,
|
||||
save_marks_in_cache,
|
||||
|
@ -1,77 +1,23 @@
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityInfo.h>
|
||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
MergeTreeIndexGranularity::MergeTreeIndexGranularity(const std::vector<size_t> & marks_rows_partial_sums_)
|
||||
: marks_rows_partial_sums(marks_rows_partial_sums_)
|
||||
namespace MergeTreeSetting
|
||||
{
|
||||
}
|
||||
|
||||
/// Rows after mark to next mark
|
||||
size_t MergeTreeIndexGranularity::getMarkRows(size_t mark_index) const
|
||||
{
|
||||
if (mark_index >= getMarksCount())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get non existing mark {}, while size is {}", mark_index, getMarksCount());
|
||||
if (mark_index == 0)
|
||||
return marks_rows_partial_sums[0];
|
||||
return marks_rows_partial_sums[mark_index] - marks_rows_partial_sums[mark_index - 1];
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularity::getMarkStartingRow(size_t mark_index) const
|
||||
{
|
||||
if (mark_index == 0)
|
||||
return 0;
|
||||
return marks_rows_partial_sums[mark_index - 1];
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularity::getMarksCount() const
|
||||
{
|
||||
return marks_rows_partial_sums.size();
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularity::getTotalRows() const
|
||||
{
|
||||
if (marks_rows_partial_sums.empty())
|
||||
return 0;
|
||||
return marks_rows_partial_sums.back();
|
||||
}
|
||||
|
||||
void MergeTreeIndexGranularity::appendMark(size_t rows_count)
|
||||
{
|
||||
if (marks_rows_partial_sums.empty())
|
||||
marks_rows_partial_sums.push_back(rows_count);
|
||||
else
|
||||
marks_rows_partial_sums.push_back(marks_rows_partial_sums.back() + rows_count);
|
||||
}
|
||||
|
||||
void MergeTreeIndexGranularity::addRowsToLastMark(size_t rows_count)
|
||||
{
|
||||
if (marks_rows_partial_sums.empty())
|
||||
marks_rows_partial_sums.push_back(rows_count);
|
||||
else
|
||||
marks_rows_partial_sums.back() += rows_count;
|
||||
}
|
||||
|
||||
void MergeTreeIndexGranularity::popMark()
|
||||
{
|
||||
if (!marks_rows_partial_sums.empty())
|
||||
marks_rows_partial_sums.pop_back();
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularity::getRowsCountInRange(size_t begin, size_t end) const
|
||||
{
|
||||
size_t subtrahend = 0;
|
||||
if (begin != 0)
|
||||
subtrahend = marks_rows_partial_sums[begin - 1];
|
||||
return marks_rows_partial_sums[end - 1] - subtrahend;
|
||||
extern const MergeTreeSettingsUInt64 index_granularity;
|
||||
extern const MergeTreeSettingsUInt64 index_granularity_bytes;
|
||||
extern const MergeTreeSettingsBool use_const_adaptive_granularity;
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularity::getRowsCountInRange(const MarkRange & range) const
|
||||
@ -87,55 +33,118 @@ size_t MergeTreeIndexGranularity::getRowsCountInRanges(const MarkRanges & ranges
|
||||
return total;
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularity::countMarksForRows(size_t from_mark, size_t number_of_rows) const
|
||||
size_t MergeTreeIndexGranularity::getMarksCountWithoutFinal() const
|
||||
{
|
||||
size_t rows_before_mark = getMarkStartingRow(from_mark);
|
||||
size_t last_row_pos = rows_before_mark + number_of_rows;
|
||||
auto it = std::upper_bound(marks_rows_partial_sums.begin(), marks_rows_partial_sums.end(), last_row_pos);
|
||||
size_t to_mark = it - marks_rows_partial_sums.begin();
|
||||
return to_mark - from_mark;
|
||||
size_t total = getMarksCount();
|
||||
if (total == 0)
|
||||
return total;
|
||||
return total - hasFinalMark();
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularity::countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const
|
||||
size_t MergeTreeIndexGranularity::getMarkStartingRow(size_t mark_index) const
|
||||
{
|
||||
size_t rows_before_mark = getMarkStartingRow(from_mark);
|
||||
size_t last_row_pos = rows_before_mark + offset_in_rows + number_of_rows;
|
||||
auto it = std::upper_bound(marks_rows_partial_sums.begin(), marks_rows_partial_sums.end(), last_row_pos);
|
||||
size_t to_mark = it - marks_rows_partial_sums.begin();
|
||||
|
||||
return getRowsCountInRange(from_mark, std::max(1UL, to_mark)) - offset_in_rows;
|
||||
return getRowsCountInRange(0, mark_index);
|
||||
}
|
||||
|
||||
void MergeTreeIndexGranularity::resizeWithFixedGranularity(size_t size, size_t fixed_granularity)
|
||||
size_t MergeTreeIndexGranularity::getLastMarkRows() const
|
||||
{
|
||||
marks_rows_partial_sums.resize(size);
|
||||
return getMarkRows(getMarksCount() - 1);
|
||||
}
|
||||
|
||||
size_t prev = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
size_t MergeTreeIndexGranularity::getLastNonFinalMarkRows() const
|
||||
{
|
||||
size_t last_mark_rows = getMarkRows(getMarksCount() - 1);
|
||||
if (last_mark_rows != 0)
|
||||
return last_mark_rows;
|
||||
return getMarkRows(getMarksCount() - 2);
|
||||
}
|
||||
|
||||
void MergeTreeIndexGranularity::addRowsToLastMark(size_t rows_count)
|
||||
{
|
||||
if (hasFinalMark())
|
||||
{
|
||||
marks_rows_partial_sums[i] = fixed_granularity + prev;
|
||||
prev = marks_rows_partial_sums[i];
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add rows to final mark");
|
||||
}
|
||||
else if (empty())
|
||||
{
|
||||
appendMark(rows_count);
|
||||
}
|
||||
else
|
||||
{
|
||||
adjustLastMark(getLastMarkRows() + rows_count);
|
||||
}
|
||||
}
|
||||
|
||||
std::string MergeTreeIndexGranularity::describe() const
|
||||
size_t computeIndexGranularity(
|
||||
size_t rows,
|
||||
size_t bytes_uncompressed,
|
||||
size_t index_granularity_bytes,
|
||||
size_t fixed_index_granularity_rows,
|
||||
bool blocks_are_granules,
|
||||
bool can_use_adaptive_index_granularity)
|
||||
{
|
||||
return fmt::format("initialized: {}, marks_rows_partial_sums: [{}]", initialized, fmt::join(marks_rows_partial_sums, ", "));
|
||||
size_t index_granularity_for_block;
|
||||
|
||||
if (!can_use_adaptive_index_granularity)
|
||||
{
|
||||
index_granularity_for_block = fixed_index_granularity_rows;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (blocks_are_granules)
|
||||
{
|
||||
index_granularity_for_block = rows;
|
||||
}
|
||||
else if (bytes_uncompressed >= index_granularity_bytes)
|
||||
{
|
||||
size_t granules_in_block = bytes_uncompressed / index_granularity_bytes;
|
||||
index_granularity_for_block = rows / granules_in_block;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t size_of_row_in_bytes = std::max(bytes_uncompressed / rows, 1UL);
|
||||
index_granularity_for_block = index_granularity_bytes / size_of_row_in_bytes;
|
||||
}
|
||||
}
|
||||
|
||||
/// We should be less or equal than fixed index granularity.
|
||||
/// But if block size is a granule size then do not adjust it.
|
||||
/// Granularity greater than fixed granularity might come from compact part.
|
||||
if (!blocks_are_granules)
|
||||
index_granularity_for_block = std::min(fixed_index_granularity_rows, index_granularity_for_block);
|
||||
|
||||
/// Very rare case when index granularity bytes less than single row.
|
||||
if (index_granularity_for_block == 0)
|
||||
index_granularity_for_block = 1;
|
||||
|
||||
return index_granularity_for_block;
|
||||
}
|
||||
|
||||
void MergeTreeIndexGranularity::shrinkToFitInMemory()
|
||||
MergeTreeIndexGranularityPtr createMergeTreeIndexGranularity(
|
||||
size_t rows,
|
||||
size_t bytes_uncompressed,
|
||||
const MergeTreeSettings & settings,
|
||||
const MergeTreeIndexGranularityInfo & info,
|
||||
bool blocks_are_granules)
|
||||
{
|
||||
marks_rows_partial_sums.shrink_to_fit();
|
||||
bool use_adaptive_granularity = info.mark_type.adaptive;
|
||||
bool use_const_adaptive_granularity = settings[MergeTreeSetting::use_const_adaptive_granularity];
|
||||
bool is_compact_part = info.mark_type.part_type == MergeTreeDataPartType::Compact;
|
||||
|
||||
/// Compact parts cannot work without adaptive granularity.
|
||||
/// If part is empty create adaptive granularity because constant granularity doesn't support this corner case.
|
||||
if (rows == 0 || blocks_are_granules || is_compact_part || (use_adaptive_granularity && !use_const_adaptive_granularity))
|
||||
return std::make_shared<MergeTreeIndexGranularityAdaptive>();
|
||||
|
||||
size_t computed_granularity = computeIndexGranularity(
|
||||
rows,
|
||||
bytes_uncompressed,
|
||||
settings[MergeTreeSetting::index_granularity_bytes],
|
||||
settings[MergeTreeSetting::index_granularity],
|
||||
blocks_are_granules,
|
||||
use_adaptive_granularity);
|
||||
|
||||
return std::make_shared<MergeTreeIndexGranularityConstant>(computed_granularity);
|
||||
}
|
||||
|
||||
uint64_t MergeTreeIndexGranularity::getBytesSize() const
|
||||
{
|
||||
return marks_rows_partial_sums.size() * sizeof(size_t);
|
||||
}
|
||||
uint64_t MergeTreeIndexGranularity::getBytesAllocated() const
|
||||
{
|
||||
return marks_rows_partial_sums.capacity() * sizeof(size_t);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -1,35 +1,28 @@
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include <optional>
|
||||
#include <Storages/MergeTree/MarkRange.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Class contains information about index granularity in rows of IMergeTreeDataPart
|
||||
/// Inside it contains vector of partial sums of rows after mark:
|
||||
/// |-----|---|----|----|
|
||||
/// | 5 | 8 | 12 | 16 |
|
||||
/// If user doesn't specify setting index_granularity_bytes for MergeTree* table
|
||||
/// all values in inner vector would have constant stride (default 8192).
|
||||
/// Class that contains information about index granularity in rows of IMergeTreeDataPart
|
||||
class MergeTreeIndexGranularity
|
||||
{
|
||||
private:
|
||||
std::vector<size_t> marks_rows_partial_sums;
|
||||
bool initialized = false;
|
||||
|
||||
public:
|
||||
MergeTreeIndexGranularity() = default;
|
||||
explicit MergeTreeIndexGranularity(const std::vector<size_t> & marks_rows_partial_sums_);
|
||||
virtual ~MergeTreeIndexGranularity() = default;
|
||||
|
||||
/// Returns granularity if it is constant for whole part (except last granule).
|
||||
virtual std::optional<size_t> getConstantGranularity() const = 0;
|
||||
/// Return count of rows between marks
|
||||
virtual size_t getRowsCountInRange(size_t begin, size_t end) const = 0;
|
||||
/// Return count of rows between marks
|
||||
size_t getRowsCountInRange(const MarkRange & range) const;
|
||||
/// Return count of rows between marks
|
||||
size_t getRowsCountInRange(size_t begin, size_t end) const;
|
||||
/// Return sum of rows between all ranges
|
||||
size_t getRowsCountInRanges(const MarkRanges & ranges) const;
|
||||
|
||||
/// Return number of marks, starting from `from_marks` that contain `number_of_rows`
|
||||
size_t countMarksForRows(size_t from_mark, size_t number_of_rows) const;
|
||||
virtual size_t countMarksForRows(size_t from_mark, size_t number_of_rows) const = 0;
|
||||
|
||||
/// Return number of rows, starting from `from_mark`, that contains amount of `number_of_rows`
|
||||
/// and possible some offset_in_rows from `from_mark`
|
||||
@ -37,74 +30,65 @@ public:
|
||||
/// |-----|---------------------------|----|----|
|
||||
/// ^------------------------^-----------^
|
||||
//// from_mark offset_in_rows number_of_rows
|
||||
size_t countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const;
|
||||
virtual size_t countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const = 0;
|
||||
|
||||
/// Total marks
|
||||
size_t getMarksCount() const;
|
||||
virtual size_t getMarksCount() const = 0;
|
||||
/// Total rows
|
||||
size_t getTotalRows() const;
|
||||
virtual size_t getTotalRows() const = 0;
|
||||
|
||||
/// Total number marks without final mark if it exists
|
||||
size_t getMarksCountWithoutFinal() const { return getMarksCount() - hasFinalMark(); }
|
||||
size_t getMarksCountWithoutFinal() const;
|
||||
|
||||
/// Rows after mark to next mark
|
||||
size_t getMarkRows(size_t mark_index) const;
|
||||
virtual size_t getMarkRows(size_t mark_index) const = 0;
|
||||
|
||||
/// Return amount of rows before mark
|
||||
size_t getMarkStartingRow(size_t mark_index) const;
|
||||
|
||||
/// Amount of rows after last mark
|
||||
size_t getLastMarkRows() const
|
||||
{
|
||||
size_t last = marks_rows_partial_sums.size() - 1;
|
||||
return getMarkRows(last);
|
||||
}
|
||||
size_t getLastMarkRows() const;
|
||||
|
||||
size_t getLastNonFinalMarkRows() const
|
||||
{
|
||||
size_t last_mark_rows = getLastMarkRows();
|
||||
if (last_mark_rows != 0)
|
||||
return last_mark_rows;
|
||||
return getMarkRows(marks_rows_partial_sums.size() - 2);
|
||||
}
|
||||
/// Amount of rows after last non-final mark
|
||||
size_t getLastNonFinalMarkRows() const;
|
||||
|
||||
bool hasFinalMark() const
|
||||
{
|
||||
return getLastMarkRows() == 0;
|
||||
}
|
||||
virtual bool hasFinalMark() const = 0;
|
||||
bool empty() const { return getMarksCount() == 0; }
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return marks_rows_partial_sums.empty();
|
||||
}
|
||||
/// Add new mark with rows_count.
|
||||
virtual void appendMark(size_t rows_count) = 0;
|
||||
|
||||
bool isInitialized() const
|
||||
{
|
||||
return initialized;
|
||||
}
|
||||
|
||||
void setInitialized()
|
||||
{
|
||||
initialized = true;
|
||||
}
|
||||
/// Add new mark with rows_count
|
||||
void appendMark(size_t rows_count);
|
||||
|
||||
/// Extends last mark by rows_count.
|
||||
/// Sets last mark equal to rows_count.
|
||||
virtual void adjustLastMark(size_t rows_count) = 0;
|
||||
void addRowsToLastMark(size_t rows_count);
|
||||
|
||||
/// Drops last mark if any exists.
|
||||
void popMark();
|
||||
virtual uint64_t getBytesSize() const = 0;
|
||||
virtual uint64_t getBytesAllocated() const = 0;
|
||||
|
||||
/// Add `size` of marks with `fixed_granularity` rows
|
||||
void resizeWithFixedGranularity(size_t size, size_t fixed_granularity);
|
||||
|
||||
std::string describe() const;
|
||||
|
||||
void shrinkToFitInMemory();
|
||||
|
||||
uint64_t getBytesSize() const;
|
||||
uint64_t getBytesAllocated() const;
|
||||
/// Possibly optimizes values in memory (for example, to constant value).
|
||||
/// Returns new optimized index granularity structure or nullptr if no optimization is not applicable.
|
||||
virtual std::shared_ptr<MergeTreeIndexGranularity> optimize() = 0;
|
||||
virtual std::string describe() const = 0;
|
||||
};
|
||||
|
||||
using MergeTreeIndexGranularityPtr = std::shared_ptr<MergeTreeIndexGranularity>;
|
||||
|
||||
size_t computeIndexGranularity(
|
||||
size_t rows,
|
||||
size_t bytes_uncompressed,
|
||||
size_t index_granularity_bytes,
|
||||
size_t fixed_index_granularity_rows,
|
||||
bool blocks_are_granules,
|
||||
bool can_use_adaptive_index_granularity);
|
||||
|
||||
struct MergeTreeSettings;
|
||||
struct MergeTreeIndexGranularityInfo;
|
||||
|
||||
MergeTreeIndexGranularityPtr createMergeTreeIndexGranularity(
|
||||
size_t rows,
|
||||
size_t bytes_uncompressed,
|
||||
const MergeTreeSettings & settings,
|
||||
const MergeTreeIndexGranularityInfo & info,
|
||||
bool blocks_are_granules);
|
||||
|
||||
}
|
||||
|
152
src/Storages/MergeTree/MergeTreeIndexGranularityAdaptive.cpp
Normal file
152
src/Storages/MergeTree/MergeTreeIndexGranularityAdaptive.cpp
Normal file
@ -0,0 +1,152 @@
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
MergeTreeIndexGranularityAdaptive::MergeTreeIndexGranularityAdaptive(const std::vector<size_t> & marks_rows_partial_sums_)
|
||||
: marks_rows_partial_sums(marks_rows_partial_sums_)
|
||||
{
|
||||
}
|
||||
|
||||
/// Rows after mark to next mark
|
||||
size_t MergeTreeIndexGranularityAdaptive::getMarkRows(size_t mark_index) const
|
||||
{
|
||||
if (mark_index >= getMarksCount())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get non existing mark {}, while size is {}", mark_index, getMarksCount());
|
||||
|
||||
if (mark_index == 0)
|
||||
return marks_rows_partial_sums[0];
|
||||
|
||||
return marks_rows_partial_sums[mark_index] - marks_rows_partial_sums[mark_index - 1];
|
||||
}
|
||||
|
||||
bool MergeTreeIndexGranularityAdaptive::hasFinalMark() const
|
||||
{
|
||||
if (marks_rows_partial_sums.empty())
|
||||
return false;
|
||||
return getLastMarkRows() == 0;
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularityAdaptive::getMarksCount() const
|
||||
{
|
||||
return marks_rows_partial_sums.size();
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularityAdaptive::getTotalRows() const
|
||||
{
|
||||
if (marks_rows_partial_sums.empty())
|
||||
return 0;
|
||||
return marks_rows_partial_sums.back();
|
||||
}
|
||||
|
||||
void MergeTreeIndexGranularityAdaptive::appendMark(size_t rows_count)
|
||||
{
|
||||
if (hasFinalMark())
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot append mark after final");
|
||||
}
|
||||
else if (marks_rows_partial_sums.empty())
|
||||
{
|
||||
marks_rows_partial_sums.push_back(rows_count);
|
||||
}
|
||||
else
|
||||
{
|
||||
marks_rows_partial_sums.push_back(marks_rows_partial_sums.back() + rows_count);
|
||||
}
|
||||
}
|
||||
|
||||
void MergeTreeIndexGranularityAdaptive::adjustLastMark(size_t rows_count)
|
||||
{
|
||||
if (hasFinalMark())
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot adjust final mark");
|
||||
}
|
||||
else if (marks_rows_partial_sums.empty())
|
||||
{
|
||||
marks_rows_partial_sums.push_back(rows_count);
|
||||
}
|
||||
else
|
||||
{
|
||||
marks_rows_partial_sums.pop_back();
|
||||
appendMark(rows_count);
|
||||
}
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularityAdaptive::getRowsCountInRange(size_t begin, size_t end) const
|
||||
{
|
||||
if (end > getMarksCount())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get marks in range [{}; {}), while size is {}", begin, end, getMarksCount());
|
||||
|
||||
if (end == 0)
|
||||
return 0;
|
||||
|
||||
size_t subtrahend = 0;
|
||||
if (begin != 0)
|
||||
subtrahend = marks_rows_partial_sums[begin - 1];
|
||||
|
||||
return marks_rows_partial_sums[end - 1] - subtrahend;
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularityAdaptive::countMarksForRows(size_t from_mark, size_t number_of_rows) const
|
||||
{
|
||||
size_t rows_before_mark = getMarkStartingRow(from_mark);
|
||||
size_t last_row_pos = rows_before_mark + number_of_rows;
|
||||
auto it = std::upper_bound(marks_rows_partial_sums.begin(), marks_rows_partial_sums.end(), last_row_pos);
|
||||
size_t to_mark = it - marks_rows_partial_sums.begin();
|
||||
return to_mark - from_mark;
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularityAdaptive::countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const
|
||||
{
|
||||
size_t rows_before_mark = getMarkStartingRow(from_mark);
|
||||
size_t last_row_pos = rows_before_mark + offset_in_rows + number_of_rows;
|
||||
auto it = std::upper_bound(marks_rows_partial_sums.begin(), marks_rows_partial_sums.end(), last_row_pos);
|
||||
size_t to_mark = it - marks_rows_partial_sums.begin();
|
||||
|
||||
return getRowsCountInRange(from_mark, std::max(1UL, to_mark)) - offset_in_rows;
|
||||
}
|
||||
|
||||
uint64_t MergeTreeIndexGranularityAdaptive::getBytesSize() const
|
||||
{
|
||||
return marks_rows_partial_sums.size() * sizeof(size_t);
|
||||
}
|
||||
|
||||
uint64_t MergeTreeIndexGranularityAdaptive::getBytesAllocated() const
|
||||
{
|
||||
return marks_rows_partial_sums.capacity() * sizeof(size_t);
|
||||
}
|
||||
|
||||
std::shared_ptr<MergeTreeIndexGranularity> MergeTreeIndexGranularityAdaptive::optimize()
|
||||
{
|
||||
size_t marks_count = getMarksCountWithoutFinal();
|
||||
if (marks_count == 0)
|
||||
return nullptr;
|
||||
|
||||
size_t first_mark = getMarkRows(0);
|
||||
for (size_t i = 1; i < marks_count - 1; ++i)
|
||||
{
|
||||
if (getMarkRows(i) != first_mark)
|
||||
{
|
||||
/// We cannot optimize to constant but at least optimize memory usage.
|
||||
marks_rows_partial_sums.shrink_to_fit();
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
size_t last_mark = getMarkRows(marks_count - 1);
|
||||
return std::make_shared<MergeTreeIndexGranularityConstant>(first_mark, last_mark, marks_count, hasFinalMark());
|
||||
}
|
||||
|
||||
std::string MergeTreeIndexGranularityAdaptive::describe() const
|
||||
{
|
||||
return fmt::format("Adaptive(marks_rows_partial_sums: [{}])", fmt::join(marks_rows_partial_sums, ", "));
|
||||
}
|
||||
|
||||
}
|
42
src/Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h
Normal file
42
src/Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h
Normal file
@ -0,0 +1,42 @@
|
||||
#pragma once
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Class that stores adaptive index granularity.
|
||||
/// Inside it contains vector of partial sums of rows after mark:
|
||||
/// |-----|---|----|----|
|
||||
/// | 5 | 8 | 12 | 16 |
|
||||
class MergeTreeIndexGranularityAdaptive final : public MergeTreeIndexGranularity
|
||||
{
|
||||
public:
|
||||
MergeTreeIndexGranularityAdaptive() = default;
|
||||
explicit MergeTreeIndexGranularityAdaptive(const std::vector<size_t> & marks_rows_partial_sums_);
|
||||
|
||||
std::optional<size_t> getConstantGranularity() const override { return {}; }
|
||||
size_t getRowsCountInRange(size_t begin, size_t end) const override;
|
||||
size_t countMarksForRows(size_t from_mark, size_t number_of_rows) const override;
|
||||
size_t countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const override;
|
||||
|
||||
size_t getMarksCount() const override;
|
||||
size_t getTotalRows() const override;
|
||||
|
||||
size_t getMarkRows(size_t mark_index) const override;
|
||||
bool hasFinalMark() const override;
|
||||
|
||||
void appendMark(size_t rows_count) override;
|
||||
void adjustLastMark(size_t rows_count) override;
|
||||
|
||||
uint64_t getBytesSize() const override;
|
||||
uint64_t getBytesAllocated() const override;
|
||||
|
||||
std::shared_ptr<MergeTreeIndexGranularity> optimize() override;
|
||||
std::string describe() const override;
|
||||
|
||||
private:
|
||||
std::vector<size_t> marks_rows_partial_sums;
|
||||
};
|
||||
|
||||
}
|
||||
|
143
src/Storages/MergeTree/MergeTreeIndexGranularityConstant.cpp
Normal file
143
src/Storages/MergeTree/MergeTreeIndexGranularityConstant.cpp
Normal file
@ -0,0 +1,143 @@
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
MergeTreeIndexGranularityConstant::MergeTreeIndexGranularityConstant(size_t constant_granularity_)
|
||||
: constant_granularity(constant_granularity_)
|
||||
, last_mark_granularity(constant_granularity_)
|
||||
{
|
||||
}
|
||||
|
||||
MergeTreeIndexGranularityConstant::MergeTreeIndexGranularityConstant(size_t constant_granularity_, size_t last_mark_granularity_, size_t num_marks_without_final_, bool has_final_mark_)
|
||||
: constant_granularity(constant_granularity_)
|
||||
, last_mark_granularity(last_mark_granularity_)
|
||||
, num_marks_without_final(num_marks_without_final_)
|
||||
, has_final_mark(has_final_mark_)
|
||||
{
|
||||
}
|
||||
|
||||
/// Rows after mark to next mark
|
||||
size_t MergeTreeIndexGranularityConstant::getMarkRows(size_t mark_index) const
|
||||
{
|
||||
if (mark_index >= getMarksCount())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get non existing mark {}, while size is {}", mark_index, getMarksCount());
|
||||
|
||||
if (mark_index + 1 < num_marks_without_final)
|
||||
return constant_granularity;
|
||||
|
||||
if (mark_index + 1 == num_marks_without_final)
|
||||
return last_mark_granularity;
|
||||
|
||||
return 0; // Final mark.
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularityConstant::getMarksCount() const
|
||||
{
|
||||
return num_marks_without_final + has_final_mark;
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularityConstant::getTotalRows() const
|
||||
{
|
||||
if (num_marks_without_final == 0)
|
||||
return 0;
|
||||
|
||||
return constant_granularity * (num_marks_without_final - 1) + last_mark_granularity;
|
||||
}
|
||||
|
||||
void MergeTreeIndexGranularityConstant::appendMark(size_t rows_count)
|
||||
{
|
||||
if (has_final_mark)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot append mark after final");
|
||||
}
|
||||
else if (rows_count == 0)
|
||||
{
|
||||
has_final_mark = true;
|
||||
}
|
||||
else if (rows_count != constant_granularity)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot append mark with {} rows. Granularity is constant ({})", rows_count, constant_granularity);
|
||||
}
|
||||
else
|
||||
{
|
||||
++num_marks_without_final;
|
||||
}
|
||||
}
|
||||
|
||||
void MergeTreeIndexGranularityConstant::adjustLastMark(size_t rows_count)
|
||||
{
|
||||
if (has_final_mark)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot adjust final mark");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (num_marks_without_final == 0)
|
||||
++num_marks_without_final;
|
||||
|
||||
last_mark_granularity = rows_count;
|
||||
}
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularityConstant::getRowsCountInRange(size_t begin, size_t end) const
|
||||
{
|
||||
if (end > getMarksCount())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get marks in range [{}; {}), while size is {}", begin, end, getMarksCount());
|
||||
|
||||
if (end == 0)
|
||||
return 0;
|
||||
|
||||
size_t total_rows = 0;
|
||||
if (end >= num_marks_without_final)
|
||||
{
|
||||
total_rows += last_mark_granularity;
|
||||
end = num_marks_without_final - 1;
|
||||
}
|
||||
|
||||
total_rows += constant_granularity * (end - begin);
|
||||
return total_rows;
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularityConstant::getMarkUpperBoundForRow(size_t row_index) const
|
||||
{
|
||||
size_t num_rows_with_constant_granularity = (num_marks_without_final - 1) * constant_granularity;
|
||||
|
||||
if (row_index >= getTotalRows())
|
||||
return getMarksCount();
|
||||
|
||||
if (row_index >= num_rows_with_constant_granularity)
|
||||
return num_marks_without_final - 1;
|
||||
|
||||
return row_index / constant_granularity;
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularityConstant::countMarksForRows(size_t from_mark, size_t number_of_rows) const
|
||||
{
|
||||
size_t rows_before_mark = getMarkStartingRow(from_mark);
|
||||
size_t last_row_pos = rows_before_mark + number_of_rows;
|
||||
|
||||
return getMarkUpperBoundForRow(last_row_pos) - from_mark;
|
||||
}
|
||||
|
||||
size_t MergeTreeIndexGranularityConstant::countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const
|
||||
{
|
||||
size_t rows_before_mark = getMarkStartingRow(from_mark);
|
||||
size_t last_row_pos = rows_before_mark + offset_in_rows + number_of_rows;
|
||||
|
||||
return getRowsCountInRange(from_mark, std::max(1UL, getMarkUpperBoundForRow(last_row_pos))) - offset_in_rows;
|
||||
}
|
||||
|
||||
std::string MergeTreeIndexGranularityConstant::describe() const
|
||||
{
|
||||
return fmt::format(
|
||||
"Constant(constant_granularity: {}, last_mark_granularity: {}, num_marks_without_final: {}, has_final_mark: {})",
|
||||
constant_granularity, last_mark_granularity, num_marks_without_final, has_final_mark);
|
||||
}
|
||||
|
||||
}
|
47
src/Storages/MergeTree/MergeTreeIndexGranularityConstant.h
Normal file
47
src/Storages/MergeTree/MergeTreeIndexGranularityConstant.h
Normal file
@ -0,0 +1,47 @@
|
||||
#pragma once
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Class that stores constant index granularity for whole part, except
|
||||
/// last non-zero granule and final granule which always has zero rows.
|
||||
class MergeTreeIndexGranularityConstant final : public MergeTreeIndexGranularity
|
||||
{
|
||||
private:
|
||||
size_t constant_granularity;
|
||||
size_t last_mark_granularity;
|
||||
|
||||
size_t num_marks_without_final = 0;
|
||||
bool has_final_mark = false;
|
||||
|
||||
size_t getMarkUpperBoundForRow(size_t row_index) const;
|
||||
|
||||
public:
|
||||
MergeTreeIndexGranularityConstant() = default;
|
||||
explicit MergeTreeIndexGranularityConstant(size_t constant_granularity_);
|
||||
MergeTreeIndexGranularityConstant(size_t constant_granularity_, size_t last_mark_granularity_, size_t num_marks_without_final_, bool has_final_mark_);
|
||||
|
||||
std::optional<size_t> getConstantGranularity() const override { return constant_granularity; }
|
||||
size_t getRowsCountInRange(size_t begin, size_t end) const override;
|
||||
size_t countMarksForRows(size_t from_mark, size_t number_of_rows) const override;
|
||||
size_t countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const override;
|
||||
|
||||
size_t getMarksCount() const override;
|
||||
size_t getTotalRows() const override;
|
||||
|
||||
size_t getMarkRows(size_t mark_index) const override;
|
||||
bool hasFinalMark() const override { return has_final_mark; }
|
||||
|
||||
void appendMark(size_t rows_count) override;
|
||||
void adjustLastMark(size_t rows_count) override;
|
||||
|
||||
uint64_t getBytesSize() const override { return sizeof(size_t) * 3 + sizeof(bool); }
|
||||
uint64_t getBytesAllocated() const override { return getBytesSize(); }
|
||||
|
||||
std::shared_ptr<MergeTreeIndexGranularity> optimize() override { return nullptr; }
|
||||
std::string describe() const override;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -4,12 +4,12 @@
|
||||
#include <base/types.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartType.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <Storages/MergeTree/IDataPartStorage.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class MergeTreeData;
|
||||
class IDataPartStorage;
|
||||
|
||||
|
||||
/** Various types of mark files are stored in files with various extensions:
|
||||
|
@ -151,7 +151,7 @@ UInt64 MergeTreeReadTask::estimateNumRows() const
|
||||
return rows_to_read;
|
||||
|
||||
const auto & index_granularity = info->data_part->index_granularity;
|
||||
return index_granularity.countRowsForRows(range_readers.main.currentMark(), rows_to_read, range_readers.main.numReadRowsInCurrentGranule());
|
||||
return index_granularity->countRowsForRows(range_readers.main.currentMark(), rows_to_read, range_readers.main.numReadRowsInCurrentGranule());
|
||||
}
|
||||
|
||||
MergeTreeReadTask::BlockAndProgress MergeTreeReadTask::read()
|
||||
|
@ -230,7 +230,7 @@ try
|
||||
|
||||
if (!isCancelled() && current_row < data_part->rows_count)
|
||||
{
|
||||
size_t rows_to_read = data_part->index_granularity.getMarkRows(current_mark);
|
||||
size_t rows_to_read = data_part->index_granularity->getMarkRows(current_mark);
|
||||
bool continue_reading = (current_mark != 0);
|
||||
|
||||
const auto & sample = reader->getColumns();
|
||||
|
@ -187,6 +187,8 @@ namespace ErrorCodes
|
||||
DECLARE(UInt64, min_merge_bytes_to_use_direct_io, 10ULL * 1024 * 1024 * 1024, "Minimal amount of bytes to enable O_DIRECT in merge (0 - disabled).", 0) \
|
||||
DECLARE(UInt64, index_granularity_bytes, 10 * 1024 * 1024, "Approximate amount of bytes in single granule (0 - disabled).", 0) \
|
||||
DECLARE(UInt64, min_index_granularity_bytes, 1024, "Minimum amount of bytes in single granule.", 1024) \
|
||||
DECLARE(Bool, use_const_adaptive_granularity, false, "Always use constant granularity for whole part. It allows to compress in memory values of index granularity. It can be useful in extremely large workloads with thin tables.", 0) \
|
||||
DECLARE(Bool, enable_index_granularity_compression, true, "Compress in memory values of index granularity if it is possible", 0) \
|
||||
DECLARE(Int64, merge_with_ttl_timeout, 3600 * 4, "Minimal time in seconds, when merge with delete TTL can be repeated.", 0) \
|
||||
DECLARE(Int64, merge_with_recompression_ttl_timeout, 3600 * 4, "Minimal time in seconds, when merge with recompression TTL can be repeated.", 0) \
|
||||
DECLARE(Bool, ttl_only_drop_parts, false, "Only drop altogether the expired parts and not partially prune them.", 0) \
|
||||
|
@ -1,9 +1,9 @@
|
||||
#include <Storages/MergeTree/MergedBlockOutputStream.h>
|
||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||
#include <IO/HashingWriteBuffer.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/MergeTreeTransaction.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Core/Settings.h>
|
||||
|
||||
|
||||
@ -15,6 +15,10 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace MergeTreeSetting
|
||||
{
|
||||
extern const MergeTreeSettingsBool enable_index_granularity_compression;
|
||||
}
|
||||
|
||||
MergedBlockOutputStream::MergedBlockOutputStream(
|
||||
const MergeTreeMutableDataPartPtr & data_part,
|
||||
@ -23,12 +27,12 @@ MergedBlockOutputStream::MergedBlockOutputStream(
|
||||
const MergeTreeIndices & skip_indices,
|
||||
const ColumnsStatistics & statistics,
|
||||
CompressionCodecPtr default_codec_,
|
||||
MergeTreeIndexGranularityPtr index_granularity_ptr,
|
||||
TransactionID tid,
|
||||
bool reset_columns_,
|
||||
bool save_marks_in_cache,
|
||||
bool blocks_are_granules_size,
|
||||
const WriteSettings & write_settings_,
|
||||
const MergeTreeIndexGranularity & computed_index_granularity)
|
||||
const WriteSettings & write_settings_)
|
||||
: IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, columns_list_, reset_columns_)
|
||||
, columns_list(columns_list_)
|
||||
, default_codec(default_codec_)
|
||||
@ -53,11 +57,22 @@ MergedBlockOutputStream::MergedBlockOutputStream(
|
||||
data_part->storeVersionMetadata();
|
||||
|
||||
writer = createMergeTreeDataPartWriter(data_part->getType(),
|
||||
data_part->name, data_part->storage.getLogName(), data_part->getSerializations(),
|
||||
data_part_storage, data_part->index_granularity_info,
|
||||
storage_settings,
|
||||
columns_list, data_part->getColumnPositions(), metadata_snapshot, data_part->storage.getVirtualsPtr(),
|
||||
skip_indices, statistics, data_part->getMarksFileExtension(), default_codec, writer_settings, computed_index_granularity);
|
||||
data_part->name,
|
||||
data_part->storage.getLogName(),
|
||||
data_part->getSerializations(),
|
||||
data_part_storage,
|
||||
data_part->index_granularity_info,
|
||||
storage_settings,
|
||||
columns_list,
|
||||
data_part->getColumnPositions(),
|
||||
metadata_snapshot,
|
||||
data_part->storage.getVirtualsPtr(),
|
||||
skip_indices,
|
||||
statistics,
|
||||
data_part->getMarksFileExtension(),
|
||||
default_codec,
|
||||
writer_settings,
|
||||
std::move(index_granularity_ptr));
|
||||
}
|
||||
|
||||
/// If data is pre-sorted.
|
||||
@ -233,10 +248,14 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync(
|
||||
new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk());
|
||||
new_part->setBytesUncompressedOnDisk(checksums.getTotalSizeUncompressedOnDisk());
|
||||
new_part->index_granularity = writer->getIndexGranularity();
|
||||
/// Just in case
|
||||
new_part->index_granularity.shrinkToFitInMemory();
|
||||
new_part->calculateColumnsAndSecondaryIndicesSizesOnDisk(writer->getColumnsSample());
|
||||
|
||||
if ((*new_part->storage.getSettings())[MergeTreeSetting::enable_index_granularity_compression])
|
||||
{
|
||||
if (auto new_index_granularity = new_part->index_granularity->optimize())
|
||||
new_part->index_granularity = std::move(new_index_granularity);
|
||||
}
|
||||
|
||||
/// In mutation, existing_rows_count is already calculated in PartMergerWriter
|
||||
/// In merge situation, lightweight deleted rows was physically deleted, existing_rows_count equals rows_count
|
||||
if (!new_part->existing_rows_count.has_value())
|
||||
|
@ -22,12 +22,12 @@ public:
|
||||
const MergeTreeIndices & skip_indices,
|
||||
const ColumnsStatistics & statistics,
|
||||
CompressionCodecPtr default_codec_,
|
||||
MergeTreeIndexGranularityPtr index_granularity_ptr,
|
||||
TransactionID tid,
|
||||
bool reset_columns_ = false,
|
||||
bool save_marks_in_cache = false,
|
||||
bool blocks_are_granules_size = false,
|
||||
const WriteSettings & write_settings = {},
|
||||
const MergeTreeIndexGranularity & computed_index_granularity = {});
|
||||
const WriteSettings & write_settings = {});
|
||||
|
||||
Block getHeader() const { return metadata_snapshot->getSampleBlock(); }
|
||||
|
||||
|
@ -15,25 +15,25 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream(
|
||||
const MergeTreeMutableDataPartPtr & data_part,
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
const NamesAndTypesList & columns_list_,
|
||||
CompressionCodecPtr default_codec,
|
||||
const MergeTreeIndices & indices_to_recalc,
|
||||
const ColumnsStatistics & stats_to_recalc_,
|
||||
WrittenOffsetColumns * offset_columns_,
|
||||
bool save_marks_in_cache,
|
||||
const MergeTreeIndexGranularity & index_granularity,
|
||||
const MergeTreeIndexGranularityInfo * index_granularity_info)
|
||||
const ColumnsStatistics & stats_to_recalc,
|
||||
CompressionCodecPtr default_codec,
|
||||
MergeTreeIndexGranularityPtr index_granularity_ptr,
|
||||
WrittenOffsetColumns * offset_columns,
|
||||
bool save_marks_in_cache)
|
||||
: IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, columns_list_, /*reset_columns=*/ true)
|
||||
{
|
||||
const auto & global_settings = data_part->storage.getContext()->getSettingsRef();
|
||||
|
||||
/// Granularity is never recomputed while writing only columns.
|
||||
MergeTreeWriterSettings writer_settings(
|
||||
global_settings,
|
||||
data_part->storage.getContext()->getWriteSettings(),
|
||||
storage_settings,
|
||||
index_granularity_info ? index_granularity_info->mark_type.adaptive : data_part->storage.canUseAdaptiveGranularity(),
|
||||
/* rewrite_primary_key = */ false,
|
||||
data_part->index_granularity_info.mark_type.adaptive,
|
||||
/*rewrite_primary_key=*/ false,
|
||||
save_marks_in_cache,
|
||||
/* blocks_are_granules_size = */ false);
|
||||
/*blocks_are_granules_size=*/ false);
|
||||
|
||||
writer = createMergeTreeDataPartWriter(
|
||||
data_part->getType(),
|
||||
@ -45,17 +45,17 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream(
|
||||
metadata_snapshot_,
|
||||
data_part->storage.getVirtualsPtr(),
|
||||
indices_to_recalc,
|
||||
stats_to_recalc_,
|
||||
stats_to_recalc,
|
||||
data_part->getMarksFileExtension(),
|
||||
default_codec,
|
||||
writer_settings,
|
||||
index_granularity);
|
||||
std::move(index_granularity_ptr));
|
||||
|
||||
auto * writer_on_disk = dynamic_cast<MergeTreeDataPartWriterOnDisk *>(writer.get());
|
||||
if (!writer_on_disk)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergedColumnOnlyOutputStream supports only parts stored on disk");
|
||||
|
||||
writer_on_disk->setWrittenOffsetColumns(offset_columns_);
|
||||
writer_on_disk->setWrittenOffsetColumns(offset_columns);
|
||||
}
|
||||
|
||||
void MergedColumnOnlyOutputStream::write(const Block & block)
|
||||
|
@ -18,13 +18,12 @@ public:
|
||||
const MergeTreeMutableDataPartPtr & data_part,
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
const NamesAndTypesList & columns_list_,
|
||||
CompressionCodecPtr default_codec_,
|
||||
const MergeTreeIndices & indices_to_recalc_,
|
||||
const ColumnsStatistics & stats_to_recalc_,
|
||||
WrittenOffsetColumns * offset_columns_ = nullptr,
|
||||
bool save_marks_in_cache = false,
|
||||
const MergeTreeIndexGranularity & index_granularity = {},
|
||||
const MergeTreeIndexGranularityInfo * index_granularity_info_ = nullptr);
|
||||
const MergeTreeIndices & indices_to_recalc,
|
||||
const ColumnsStatistics & stats_to_recalc,
|
||||
CompressionCodecPtr default_codec,
|
||||
MergeTreeIndexGranularityPtr index_granularity_ptr,
|
||||
WrittenOffsetColumns * offset_columns = nullptr,
|
||||
bool save_marks_in_cache = false);
|
||||
|
||||
void write(const Block & block) override;
|
||||
|
||||
|
@ -74,6 +74,7 @@ namespace MergeTreeSetting
|
||||
extern const MergeTreeSettingsFloat ratio_of_defaults_for_sparse_serialization;
|
||||
extern const MergeTreeSettingsBool replace_long_file_name_to_hash;
|
||||
extern const MergeTreeSettingsBool ttl_only_drop_parts;
|
||||
extern const MergeTreeSettingsBool enable_index_granularity_compression;
|
||||
}
|
||||
|
||||
namespace ErrorCodes
|
||||
@ -984,12 +985,16 @@ void finalizeMutatedPart(
|
||||
|
||||
new_data_part->rows_count = source_part->rows_count;
|
||||
new_data_part->index_granularity = source_part->index_granularity;
|
||||
/// Just in case
|
||||
new_data_part->index_granularity.shrinkToFitInMemory();
|
||||
new_data_part->setIndex(*source_part->getIndex());
|
||||
new_data_part->minmax_idx = source_part->minmax_idx;
|
||||
new_data_part->modification_time = time(nullptr);
|
||||
|
||||
if ((*new_data_part->storage.getSettings())[MergeTreeSetting::enable_index_granularity_compression])
|
||||
{
|
||||
if (auto new_index_granularity = new_data_part->index_granularity->optimize())
|
||||
new_data_part->index_granularity = std::move(new_index_granularity);
|
||||
}
|
||||
|
||||
/// Load rest projections which are hardlinked
|
||||
bool noop;
|
||||
new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */);
|
||||
@ -1607,7 +1612,6 @@ private:
|
||||
|
||||
ctx->minmax_idx = std::make_shared<IMergeTreeDataPart::MinMaxIndex>();
|
||||
|
||||
MergeTreeIndexGranularity computed_granularity;
|
||||
bool has_delete = false;
|
||||
|
||||
for (auto & command_for_interpreter : ctx->for_interpreter)
|
||||
@ -1620,9 +1624,21 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
MergeTreeIndexGranularityPtr index_granularity_ptr;
|
||||
/// Reuse source part granularity if mutation does not change number of rows
|
||||
if (!has_delete && ctx->execute_ttl_type == ExecuteTTLType::NONE)
|
||||
computed_granularity = ctx->source_part->index_granularity;
|
||||
{
|
||||
index_granularity_ptr = ctx->source_part->index_granularity;
|
||||
}
|
||||
else
|
||||
{
|
||||
index_granularity_ptr = createMergeTreeIndexGranularity(
|
||||
ctx->new_data_part->rows_count,
|
||||
ctx->new_data_part->getBytesUncompressedOnDisk(),
|
||||
*ctx->data->getSettings(),
|
||||
ctx->new_data_part->index_granularity_info,
|
||||
/*blocks_are_granules=*/ false);
|
||||
}
|
||||
|
||||
ctx->out = std::make_shared<MergedBlockOutputStream>(
|
||||
ctx->new_data_part,
|
||||
@ -1631,12 +1647,12 @@ private:
|
||||
skip_indices,
|
||||
stats_to_rewrite,
|
||||
ctx->compression_codec,
|
||||
std::move(index_granularity_ptr),
|
||||
ctx->txn ? ctx->txn->tid : Tx::PrehistoricTID,
|
||||
/*reset_columns=*/ true,
|
||||
/*save_marks_in_cache=*/ false,
|
||||
/*blocks_are_granules_size=*/ false,
|
||||
ctx->context->getWriteSettings(),
|
||||
computed_granularity);
|
||||
ctx->context->getWriteSettings());
|
||||
|
||||
ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
|
||||
ctx->mutating_pipeline.setProgressCallback(ctx->progress_callback);
|
||||
@ -1866,14 +1882,10 @@ private:
|
||||
ctx->new_data_part,
|
||||
ctx->metadata_snapshot,
|
||||
ctx->updated_header.getNamesAndTypesList(),
|
||||
ctx->compression_codec,
|
||||
std::vector<MergeTreeIndexPtr>(ctx->indices_to_recalc.begin(), ctx->indices_to_recalc.end()),
|
||||
ColumnsStatistics(ctx->stats_to_recalc.begin(), ctx->stats_to_recalc.end()),
|
||||
nullptr,
|
||||
/*save_marks_in_cache=*/ false,
|
||||
ctx->source_part->index_granularity,
|
||||
&ctx->source_part->index_granularity_info
|
||||
);
|
||||
ctx->compression_codec,
|
||||
ctx->source_part->index_granularity);
|
||||
|
||||
ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
|
||||
ctx->mutating_pipeline.setProgressCallback(ctx->progress_callback);
|
||||
|
@ -99,7 +99,7 @@ size_t RangesInDataPart::getMarksCount() const
|
||||
|
||||
size_t RangesInDataPart::getRowsCount() const
|
||||
{
|
||||
return data_part->index_granularity.getRowsCountInRanges(ranges);
|
||||
return data_part->index_granularity->getRowsCountInRanges(ranges);
|
||||
}
|
||||
|
||||
|
||||
|
@ -63,7 +63,7 @@ protected:
|
||||
marks_loader = createMarksLoader(part, MergeTreeDataPartCompact::DATA_FILE_NAME, part->getColumns().size());
|
||||
|
||||
size_t num_columns = header.columns();
|
||||
size_t num_rows = index_granularity.getMarksCount();
|
||||
size_t num_rows = index_granularity->getMarksCount();
|
||||
|
||||
const auto & part_name_column = StorageMergeTreeIndex::part_name_column;
|
||||
const auto & mark_number_column = StorageMergeTreeIndex::mark_number_column;
|
||||
@ -115,7 +115,7 @@ protected:
|
||||
|
||||
data.resize(num_rows);
|
||||
for (size_t i = 0; i < num_rows; ++i)
|
||||
data[i] = index_granularity.getMarkRows(i);
|
||||
data[i] = index_granularity->getMarkRows(i);
|
||||
|
||||
result_columns[pos] = std::move(column);
|
||||
}
|
||||
@ -159,7 +159,7 @@ private:
|
||||
{
|
||||
size_t col_idx = 0;
|
||||
bool has_marks_in_part = false;
|
||||
size_t num_rows = part->index_granularity.getMarksCount();
|
||||
size_t num_rows = part->index_granularity->getMarksCount();
|
||||
|
||||
if (isWidePart(part))
|
||||
{
|
||||
|
@ -1,12 +1,15 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
// I know that inclusion of .cpp is not good at all
|
||||
#include <Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp> // NOLINT
|
||||
#include <Storages/MergeTree/MergeTreeDataPartWriterWide.cpp> // NOLINT
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
static Block getBlockWithSize(size_t required_size_in_bytes, size_t size_of_row_in_bytes)
|
||||
{
|
||||
|
||||
@ -25,16 +28,16 @@ TEST(AdaptiveIndexGranularity, FillGranularityToyTests)
|
||||
auto block1 = getBlockWithSize(80, 8);
|
||||
EXPECT_EQ(block1.bytes(), 80);
|
||||
{ /// Granularity bytes are not set. Take default index_granularity.
|
||||
MergeTreeIndexGranularity index_granularity;
|
||||
auto granularity = computeIndexGranularityImpl(block1, 0, 100, false, false);
|
||||
MergeTreeIndexGranularityAdaptive index_granularity;
|
||||
auto granularity = computeIndexGranularity(block1.rows(), block1.bytes(), 0, 100, false, false);
|
||||
fillIndexGranularityImpl(index_granularity, 0, granularity, block1.rows());
|
||||
EXPECT_EQ(index_granularity.getMarksCount(), 1);
|
||||
EXPECT_EQ(index_granularity.getMarkRows(0), 100);
|
||||
}
|
||||
|
||||
{ /// Granule size is less than block size. Block contains multiple granules.
|
||||
MergeTreeIndexGranularity index_granularity;
|
||||
auto granularity = computeIndexGranularityImpl(block1, 16, 100, false, true);
|
||||
MergeTreeIndexGranularityAdaptive index_granularity;
|
||||
auto granularity = computeIndexGranularity(block1.rows(), block1.bytes(), 16, 100, false, true);
|
||||
fillIndexGranularityImpl(index_granularity, 0, granularity, block1.rows());
|
||||
EXPECT_EQ(index_granularity.getMarksCount(), 5); /// First granule with 8 rows, and second with 1 row
|
||||
for (size_t i = 0; i < index_granularity.getMarksCount(); ++i)
|
||||
@ -43,8 +46,8 @@ TEST(AdaptiveIndexGranularity, FillGranularityToyTests)
|
||||
|
||||
{ /// Granule size is more than block size. Whole block (and maybe more) can be placed in single granule.
|
||||
|
||||
MergeTreeIndexGranularity index_granularity;
|
||||
auto granularity = computeIndexGranularityImpl(block1, 512, 100, false, true);
|
||||
MergeTreeIndexGranularityAdaptive index_granularity;
|
||||
auto granularity = computeIndexGranularity(block1.rows(), block1.bytes(), 512, 100, false, true);
|
||||
fillIndexGranularityImpl(index_granularity, 0, granularity, block1.rows());
|
||||
EXPECT_EQ(index_granularity.getMarksCount(), 1);
|
||||
for (size_t i = 0; i < index_granularity.getMarksCount(); ++i)
|
||||
@ -53,8 +56,8 @@ TEST(AdaptiveIndexGranularity, FillGranularityToyTests)
|
||||
|
||||
{ /// Blocks with granule size
|
||||
|
||||
MergeTreeIndexGranularity index_granularity;
|
||||
auto granularity = computeIndexGranularityImpl(block1, 1, 100, true, true);
|
||||
MergeTreeIndexGranularityAdaptive index_granularity;
|
||||
auto granularity = computeIndexGranularity(block1.rows(), block1.bytes(), 1, 100, true, true);
|
||||
fillIndexGranularityImpl(index_granularity, 0, granularity, block1.rows());
|
||||
EXPECT_EQ(index_granularity.getMarksCount(), 1);
|
||||
for (size_t i = 0; i < index_granularity.getMarksCount(); ++i)
|
||||
@ -62,8 +65,8 @@ TEST(AdaptiveIndexGranularity, FillGranularityToyTests)
|
||||
}
|
||||
|
||||
{ /// Shift in index offset
|
||||
MergeTreeIndexGranularity index_granularity;
|
||||
auto granularity = computeIndexGranularityImpl(block1, 16, 100, false, true);
|
||||
MergeTreeIndexGranularityAdaptive index_granularity;
|
||||
auto granularity = computeIndexGranularity(block1.rows(), block1.bytes(), 16, 100, false, true);
|
||||
fillIndexGranularityImpl(index_granularity, 6, granularity, block1.rows());
|
||||
EXPECT_EQ(index_granularity.getMarksCount(), 2);
|
||||
for (size_t i = 0; i < index_granularity.getMarksCount(); ++i)
|
||||
@ -78,10 +81,10 @@ TEST(AdaptiveIndexGranularity, FillGranularitySequenceOfBlocks)
|
||||
auto block1 = getBlockWithSize(65536, 8);
|
||||
auto block2 = getBlockWithSize(65536, 8);
|
||||
auto block3 = getBlockWithSize(65536, 8);
|
||||
MergeTreeIndexGranularity index_granularity;
|
||||
MergeTreeIndexGranularityAdaptive index_granularity;
|
||||
for (const auto & block : {block1, block2, block3})
|
||||
{
|
||||
auto granularity = computeIndexGranularityImpl(block, 1024, 8192, false, true);
|
||||
auto granularity = computeIndexGranularity(block.rows(), block.bytes(), 1024, 8192, false, true);
|
||||
fillIndexGranularityImpl(index_granularity, 0, granularity, block.rows());
|
||||
}
|
||||
|
||||
@ -94,10 +97,10 @@ TEST(AdaptiveIndexGranularity, FillGranularitySequenceOfBlocks)
|
||||
auto block2 = getBlockWithSize(32768, 32);
|
||||
auto block3 = getBlockWithSize(2048, 32);
|
||||
EXPECT_EQ(block1.rows() + block2.rows() + block3.rows(), 3136);
|
||||
MergeTreeIndexGranularity index_granularity;
|
||||
MergeTreeIndexGranularityAdaptive index_granularity;
|
||||
for (const auto & block : {block1, block2, block3})
|
||||
{
|
||||
auto granularity = computeIndexGranularityImpl(block, 1024, 8192, false, true);
|
||||
auto granularity = computeIndexGranularity(block.rows(), block.bytes(), 1024, 8192, false, true);
|
||||
fillIndexGranularityImpl(index_granularity, 0, granularity, block.rows());
|
||||
}
|
||||
|
||||
@ -113,11 +116,11 @@ TEST(AdaptiveIndexGranularity, FillGranularitySequenceOfBlocks)
|
||||
|
||||
EXPECT_EQ(block1.rows() + block2.rows() + block3.rows(), (2048 + 4096 + 8192) / 32);
|
||||
|
||||
MergeTreeIndexGranularity index_granularity;
|
||||
MergeTreeIndexGranularityAdaptive index_granularity;
|
||||
size_t index_offset = 0;
|
||||
for (const auto & block : {block1, block2, block3})
|
||||
{
|
||||
auto granularity = computeIndexGranularityImpl(block, 16384, 8192, false, true);
|
||||
auto granularity = computeIndexGranularity(block.rows(), block.bytes(), 16384, 8192, false, true);
|
||||
fillIndexGranularityImpl(index_granularity, index_offset, granularity, block.rows());
|
||||
index_offset = index_granularity.getLastMarkRows() - block.rows();
|
||||
}
|
||||
@ -128,10 +131,10 @@ TEST(AdaptiveIndexGranularity, FillGranularitySequenceOfBlocks)
|
||||
|
||||
}
|
||||
|
||||
TEST(AdaptiveIndexGranularity, TestIndexGranularityClass)
|
||||
TEST(AdaptiveIndexGranularity, TestIndexGranularityAdaptive)
|
||||
{
|
||||
{
|
||||
MergeTreeIndexGranularity index_granularity;
|
||||
MergeTreeIndexGranularityAdaptive index_granularity;
|
||||
size_t sum_rows = 0;
|
||||
size_t sum_marks = 0;
|
||||
for (size_t i = 10; i <= 100; i+=10)
|
||||
@ -148,11 +151,70 @@ TEST(AdaptiveIndexGranularity, TestIndexGranularityClass)
|
||||
EXPECT_EQ(index_granularity.getMarkStartingRow(2), 30);
|
||||
EXPECT_EQ(index_granularity.getMarkStartingRow(3), 60);
|
||||
|
||||
EXPECT_EQ(index_granularity.getRowsCountInRange({0, 10}), sum_rows);
|
||||
EXPECT_EQ(index_granularity.getRowsCountInRange({0, 1}), 10);
|
||||
EXPECT_EQ(index_granularity.getRowsCountInRange({2, 5}), 30 + 40 + 50);
|
||||
|
||||
EXPECT_EQ(index_granularity.getRowsCountInRange(0, 10), sum_rows);
|
||||
EXPECT_EQ(index_granularity.getRowsCountInRange(0, 1), 10);
|
||||
EXPECT_EQ(index_granularity.getRowsCountInRange(2, 5), 30 + 40 + 50);
|
||||
|
||||
EXPECT_EQ(index_granularity.getRowsCountInRanges({{2, 5}, {0, 1}, {0, 10}}), 10 + 30 + 40 + 50 + sum_rows);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(AdaptiveIndexGranularity, TestIndexGranularityConstant)
|
||||
{
|
||||
auto test = [](MergeTreeIndexGranularity & index_granularity, size_t granularity_rows)
|
||||
{
|
||||
size_t sum_marks = 10;
|
||||
size_t sum_rows = granularity_rows * sum_marks;
|
||||
|
||||
for (size_t i = 0; i < 10; ++i)
|
||||
index_granularity.appendMark(granularity_rows);
|
||||
|
||||
size_t new_granularity_rows = granularity_rows / 2;
|
||||
index_granularity.adjustLastMark(new_granularity_rows);
|
||||
sum_rows -= (granularity_rows - new_granularity_rows);
|
||||
|
||||
index_granularity.appendMark(0);
|
||||
++sum_marks;
|
||||
|
||||
EXPECT_EQ(index_granularity.getMarksCount(), sum_marks);
|
||||
EXPECT_EQ(index_granularity.getMarksCountWithoutFinal(), sum_marks - 1);
|
||||
EXPECT_EQ(index_granularity.hasFinalMark(), true);
|
||||
EXPECT_EQ(index_granularity.getTotalRows(), sum_rows);
|
||||
EXPECT_EQ(index_granularity.getTotalRows(), sum_rows);
|
||||
EXPECT_EQ(index_granularity.getLastMarkRows(), 0);
|
||||
EXPECT_EQ(index_granularity.getLastNonFinalMarkRows(), granularity_rows / 2);
|
||||
|
||||
EXPECT_EQ(index_granularity.getMarkStartingRow(0), 0);
|
||||
EXPECT_EQ(index_granularity.getMarkStartingRow(3), 30);
|
||||
EXPECT_EQ(index_granularity.getMarkStartingRow(9), 90);
|
||||
EXPECT_EQ(index_granularity.getMarkStartingRow(10), sum_rows);
|
||||
EXPECT_EQ(index_granularity.getMarkStartingRow(11), sum_rows);
|
||||
|
||||
EXPECT_EQ(index_granularity.getRowsCountInRange(0, 10), sum_rows);
|
||||
EXPECT_EQ(index_granularity.getRowsCountInRange(0, 11), sum_rows);
|
||||
EXPECT_EQ(index_granularity.getRowsCountInRange(0, 1), 10);
|
||||
EXPECT_EQ(index_granularity.getRowsCountInRange(2, 5), 30);
|
||||
EXPECT_EQ(index_granularity.getRowsCountInRange(3, 9), 60);
|
||||
EXPECT_EQ(index_granularity.getRowsCountInRange(5, 10), 45);
|
||||
EXPECT_EQ(index_granularity.getRowsCountInRange(5, 11), 45);
|
||||
|
||||
EXPECT_EQ(index_granularity.countMarksForRows(0, 35), 3);
|
||||
EXPECT_EQ(index_granularity.countMarksForRows(5, 29), 2);
|
||||
EXPECT_EQ(index_granularity.countMarksForRows(0, 89), 8);
|
||||
EXPECT_EQ(index_granularity.countMarksForRows(0, 90), 9);
|
||||
EXPECT_EQ(index_granularity.countMarksForRows(0, 92), 9);
|
||||
EXPECT_EQ(index_granularity.countMarksForRows(0, 95), sum_marks);
|
||||
EXPECT_EQ(index_granularity.countMarksForRows(0, 99), sum_marks);
|
||||
};
|
||||
|
||||
const size_t granularity_rows = 10;
|
||||
|
||||
{
|
||||
MergeTreeIndexGranularityConstant index_granularity(granularity_rows);
|
||||
test(index_granularity, granularity_rows);
|
||||
}
|
||||
{
|
||||
MergeTreeIndexGranularityAdaptive index_granularity;
|
||||
test(index_granularity, granularity_rows);
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
// I know that inclusion of .cpp is not good at all
|
||||
#include <Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp> // NOLINT
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
@ -13,7 +14,7 @@ TEST(IndexGranularityCompactParts, FillGranularitySequenceOfBlocks)
|
||||
size_t rows = 8;
|
||||
size_t granularity = 32;
|
||||
|
||||
MergeTreeIndexGranularity index_granularity;
|
||||
MergeTreeIndexGranularityAdaptive index_granularity;
|
||||
size_t index_offset = 0;
|
||||
size_t rows_written = 0;
|
||||
for (size_t i = 0; i < 3; ++i)
|
||||
@ -34,7 +35,7 @@ TEST(IndexGranularityCompactParts, FillGranularitySequenceOfBlocks)
|
||||
size_t rows2 = 8;
|
||||
size_t granularity = 32;
|
||||
|
||||
MergeTreeIndexGranularity index_granularity;
|
||||
MergeTreeIndexGranularityAdaptive index_granularity;
|
||||
size_t index_offset = 0;
|
||||
|
||||
fillIndexGranularityImpl(index_granularity, index_offset, granularity, rows1);
|
||||
@ -51,7 +52,7 @@ TEST(IndexGranularityCompactParts, FillGranularitySequenceOfBlocks)
|
||||
size_t rows2 = 25;
|
||||
size_t granularity = 32;
|
||||
|
||||
MergeTreeIndexGranularity index_granularity;
|
||||
MergeTreeIndexGranularityAdaptive index_granularity;
|
||||
size_t index_offset = 0;
|
||||
|
||||
fillIndexGranularityImpl(index_granularity, index_offset, granularity, rows1);
|
||||
@ -68,7 +69,7 @@ TEST(IndexGranularityCompactParts, FillGranularitySequenceOfBlocks)
|
||||
size_t rows = 40;
|
||||
size_t granularity = 32;
|
||||
|
||||
MergeTreeIndexGranularity index_granularity;
|
||||
MergeTreeIndexGranularityAdaptive index_granularity;
|
||||
size_t index_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < 3; ++i)
|
||||
|
@ -981,6 +981,8 @@ class MergeTreeSettingsRandomizer:
|
||||
"cache_populated_by_fetch": lambda: random.randint(0, 1),
|
||||
"concurrent_part_removal_threshold": threshold_generator(0.2, 0.3, 0, 100),
|
||||
"old_parts_lifetime": threshold_generator(0.2, 0.3, 10, 8 * 60),
|
||||
"use_const_adaptive_granularity": lambda: random.randint(0, 1),
|
||||
"enable_index_granularity_compression": lambda: random.randint(0, 1),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
|
@ -163,7 +163,6 @@ Filter column: notEquals(__table1.y, 2_UInt8)
|
||||
> filter is pushed down before CreatingSets
|
||||
CreatingSets
|
||||
Filter
|
||||
Filter
|
||||
1
|
||||
3
|
||||
> one condition of filter is pushed down before LEFT JOIN
|
||||
|
@ -8,3 +8,7 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
|
@ -20,7 +20,9 @@ $CLICKHOUSE_CLIENT -q "
|
||||
INSERT INTO t2 SELECT number, number FROM numbers_mt(1e6);
|
||||
"
|
||||
|
||||
# list of query_id-s that expected to be executed without preallocation
|
||||
queries_without_preallocation=()
|
||||
# list of query_id-s that expected to be executed with preallocation
|
||||
queries_with_preallocation=()
|
||||
|
||||
run_new_query() {
|
||||
@ -51,6 +53,9 @@ $CLICKHOUSE_CLIENT "${opts[@]}" --query_id="$query_id" -q "SELECT * FROM t1 AS x
|
||||
# now t1 is the right table
|
||||
run_new_query "SELECT * FROM t2 AS x INNER JOIN t1 AS y ON x.a = y.a"
|
||||
|
||||
run_new_query "SELECT * FROM t1 AS x INNER JOIN t2 AS y ON x.a = y.a WHERE a < 200_000"
|
||||
run_new_query "SELECT * FROM t1 AS x INNER JOIN t2 AS y ON x.a = y.a WHERE a >= 200_000"
|
||||
|
||||
##################################
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
|
||||
|
@ -513,6 +513,7 @@ parseDateTime
|
||||
parseDateTime32BestEffort
|
||||
parseDateTime32BestEffortOrNull
|
||||
parseDateTime32BestEffortOrZero
|
||||
parseDateTime64
|
||||
parseDateTime64BestEffort
|
||||
parseDateTime64BestEffortOrNull
|
||||
parseDateTime64BestEffortOrZero
|
||||
@ -522,6 +523,8 @@ parseDateTime64BestEffortUSOrZero
|
||||
parseDateTime64InJodaSyntax
|
||||
parseDateTime64InJodaSyntaxOrNull
|
||||
parseDateTime64InJodaSyntaxOrZero
|
||||
parseDateTime64OrNull
|
||||
parseDateTime64OrZero
|
||||
parseDateTimeBestEffort
|
||||
parseDateTimeBestEffortOrNull
|
||||
parseDateTimeBestEffortOrZero
|
||||
|
@ -332,13 +332,12 @@ SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it d
|
||||
Expression (Projection)
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Expression (Before ORDER BY)
|
||||
Filter ((WHERE + (Projection + Before ORDER BY)))
|
||||
Filter (HAVING)
|
||||
Aggregating
|
||||
Expression ((Before GROUP BY + Projection))
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
|
||||
ReadFromSystemNumbers
|
||||
Filter (((WHERE + (Projection + Before ORDER BY)) + HAVING))
|
||||
Aggregating
|
||||
Expression ((Before GROUP BY + Projection))
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
|
||||
ReadFromSystemNumbers
|
||||
-- execute
|
||||
1
|
||||
2
|
||||
|
@ -28,21 +28,17 @@ WHERE type_1 = \'all\'
|
||||
(Expression)
|
||||
ExpressionTransform × 2
|
||||
(Filter)
|
||||
FilterTransform × 2
|
||||
(Filter)
|
||||
FilterTransform × 2
|
||||
(Filter)
|
||||
FilterTransform × 2
|
||||
(Aggregating)
|
||||
ExpressionTransform × 2
|
||||
AggregatingTransform × 2
|
||||
Copy 1 → 2
|
||||
(Expression)
|
||||
ExpressionTransform
|
||||
(Expression)
|
||||
ExpressionTransform
|
||||
(ReadFromMergeTree)
|
||||
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
|
||||
FilterTransform × 6
|
||||
(Aggregating)
|
||||
ExpressionTransform × 2
|
||||
AggregatingTransform × 2
|
||||
Copy 1 → 2
|
||||
(Expression)
|
||||
ExpressionTransform
|
||||
(Expression)
|
||||
ExpressionTransform
|
||||
(ReadFromMergeTree)
|
||||
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
|
||||
(Expression)
|
||||
ExpressionTransform × 2
|
||||
(Filter)
|
||||
@ -68,14 +64,10 @@ ExpressionTransform × 2
|
||||
ExpressionTransform × 2
|
||||
AggregatingTransform × 2
|
||||
Copy 1 → 2
|
||||
(Filter)
|
||||
FilterTransform
|
||||
(Filter)
|
||||
FilterTransform
|
||||
(Expression)
|
||||
ExpressionTransform
|
||||
(ReadFromMergeTree)
|
||||
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
|
||||
(Expression)
|
||||
ExpressionTransform
|
||||
(ReadFromMergeTree)
|
||||
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
|
||||
(Expression)
|
||||
ExpressionTransform × 2
|
||||
(Aggregating)
|
||||
|
@ -274,3 +274,40 @@ select parseDateTime('08 13, 2022, 07:58:32', '%c %e, %G, %k:%i:%s', 'UTC');
|
||||
set session_timezone = 'UTC'; -- don't randomize the session timezone
|
||||
select parseDateTime('2021-01-04 23:12:34') = toDateTime('2021-01-04 23:12:34');
|
||||
1
|
||||
-- The following is test of parseDateTime64
|
||||
select parseDateTime64(''); -- { serverError NOT_ENOUGH_SPACE }
|
||||
select parseDateTime64('2177-10-09 10:30:10.123'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64('2021-01-04 23:12:34.118112') = toDateTime64('2021-01-04 23:12:34.118112', 6);
|
||||
1
|
||||
select parseDateTime64('2021-01-04 23:12:34.118112', '%Y-%m-%d %H:%i:%s.%f') = toDateTime64('2021-01-04 23:12:34.118112', 6);
|
||||
1
|
||||
select parseDateTime64('2021-01-04 23:12:34.118'); -- { serverError NOT_ENOUGH_SPACE }
|
||||
select parseDateTime64('2021-01-04 23:12:34.118', '%Y-%m-%d %H:%i:%s.%f'); -- { serverError NOT_ENOUGH_SPACE }
|
||||
select parseDateTime64('2021-01-04 23:12:34.11811235', '%Y-%m-%d %H:%i:%s.%f'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64('2021-01-04 23:12:34.118112', '%Y-%m-%d %H:%i:%s'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
--leap years and non-leap years
|
||||
select parseDateTime64('2024-02-29 11:23:34.123433', '%Y-%m-%d %H:%i:%s.%f') = toDateTime64('2024-02-29 11:23:34.123433', 6);
|
||||
1
|
||||
select parseDateTime64('2023-02-29 11:22:33.123433', '%Y-%m-%d %H:%i:%s.%f'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64('2024-02-28 23:22:33.123433', '%Y-%m-%d %H:%i:%s.%f') = toDateTime64('2024-02-28 23:22:33.123433', 6);
|
||||
1
|
||||
select parseDateTime64('2023-02-28 23:22:33.123433', '%Y-%m-%d %H:%i:%s.%f') = toDateTime64('2023-02-28 23:22:33.123433', 6);
|
||||
1
|
||||
-- Test of parseDateTime64OrNull
|
||||
select parseDateTime64OrNull('2021-01-04 23:12:34.118') IS NULL;
|
||||
1
|
||||
select parseDateTime64OrNull('2021-01-04 23:12:34.118', '%Y-%m-%d %H:%i:%s.%f') IS NULL;
|
||||
1
|
||||
select parseDateTime64OrNull('2021-01-04 23:12:34.118112', '%Y-%m-%d %H:%i:%s') IS NULL;
|
||||
1
|
||||
select parseDateTime64OrNull('2021-01-04 23:12:34.11811235', '%Y-%m-%d %H:%i:%s.%f') IS NULL;
|
||||
1
|
||||
-- Test of parseDateTime64OrZero
|
||||
select parseDateTime64OrZero('2021-01-04 23:12:34.118') = toDateTime64('1970-01-01 00:00:00', 6);
|
||||
1
|
||||
select parseDateTime64OrZero('2021-01-04 23:12:34.118', '%Y-%m-%d %H:%i:%s.%f') = toDateTime64('1970-01-01 00:00:00', 6);
|
||||
1
|
||||
select parseDateTime64OrZero('2021-01-04 23:12:34.118112', '%Y-%m-%d %H:%i:%s') = toDateTime64('1970-01-01 00:00:00', 6);
|
||||
1
|
||||
select parseDateTime64OrZero('2021-01-04 23:12:34.11811235', '%Y-%m-%d %H:%i:%s.%f') = toDateTime64('1970-01-01 00:00:00', 6);
|
||||
1
|
||||
|
@ -191,5 +191,30 @@ select parseDateTime('08 13, 2022, 07:58:32', '%c %e, %G, %k:%i:%s', 'UTC');
|
||||
set session_timezone = 'UTC'; -- don't randomize the session timezone
|
||||
select parseDateTime('2021-01-04 23:12:34') = toDateTime('2021-01-04 23:12:34');
|
||||
|
||||
-- The following is test of parseDateTime64
|
||||
select parseDateTime64(''); -- { serverError NOT_ENOUGH_SPACE }
|
||||
select parseDateTime64('2177-10-09 10:30:10.123'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64('2021-01-04 23:12:34.118112') = toDateTime64('2021-01-04 23:12:34.118112', 6);
|
||||
select parseDateTime64('2021-01-04 23:12:34.118112', '%Y-%m-%d %H:%i:%s.%f') = toDateTime64('2021-01-04 23:12:34.118112', 6);
|
||||
select parseDateTime64('2021-01-04 23:12:34.118'); -- { serverError NOT_ENOUGH_SPACE }
|
||||
select parseDateTime64('2021-01-04 23:12:34.118', '%Y-%m-%d %H:%i:%s.%f'); -- { serverError NOT_ENOUGH_SPACE }
|
||||
select parseDateTime64('2021-01-04 23:12:34.11811235', '%Y-%m-%d %H:%i:%s.%f'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64('2021-01-04 23:12:34.118112', '%Y-%m-%d %H:%i:%s'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
--leap years and non-leap years
|
||||
select parseDateTime64('2024-02-29 11:23:34.123433', '%Y-%m-%d %H:%i:%s.%f') = toDateTime64('2024-02-29 11:23:34.123433', 6);
|
||||
select parseDateTime64('2023-02-29 11:22:33.123433', '%Y-%m-%d %H:%i:%s.%f'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64('2024-02-28 23:22:33.123433', '%Y-%m-%d %H:%i:%s.%f') = toDateTime64('2024-02-28 23:22:33.123433', 6);
|
||||
select parseDateTime64('2023-02-28 23:22:33.123433', '%Y-%m-%d %H:%i:%s.%f') = toDateTime64('2023-02-28 23:22:33.123433', 6);
|
||||
-- Test of parseDateTime64OrNull
|
||||
select parseDateTime64OrNull('2021-01-04 23:12:34.118') IS NULL;
|
||||
select parseDateTime64OrNull('2021-01-04 23:12:34.118', '%Y-%m-%d %H:%i:%s.%f') IS NULL;
|
||||
select parseDateTime64OrNull('2021-01-04 23:12:34.118112', '%Y-%m-%d %H:%i:%s') IS NULL;
|
||||
select parseDateTime64OrNull('2021-01-04 23:12:34.11811235', '%Y-%m-%d %H:%i:%s.%f') IS NULL;
|
||||
-- Test of parseDateTime64OrZero
|
||||
select parseDateTime64OrZero('2021-01-04 23:12:34.118') = toDateTime64('1970-01-01 00:00:00', 6);
|
||||
select parseDateTime64OrZero('2021-01-04 23:12:34.118', '%Y-%m-%d %H:%i:%s.%f') = toDateTime64('1970-01-01 00:00:00', 6);
|
||||
select parseDateTime64OrZero('2021-01-04 23:12:34.118112', '%Y-%m-%d %H:%i:%s') = toDateTime64('1970-01-01 00:00:00', 6);
|
||||
select parseDateTime64OrZero('2021-01-04 23:12:34.11811235', '%Y-%m-%d %H:%i:%s.%f') = toDateTime64('1970-01-01 00:00:00', 6);
|
||||
|
||||
|
||||
-- { echoOff }
|
||||
|
@ -360,3 +360,113 @@ select parseDateTimeInJodaSyntax('12 AM', 'h a', 'UTC', 'a fourth argument'); --
|
||||
set session_timezone = 'UTC'; -- don't randomize the session timezone
|
||||
select parseDateTimeInJodaSyntax('2021-01-04 23:12:34') = toDateTime('2021-01-04 23:12:34');
|
||||
1
|
||||
-- Test timezone and timezone offset for parseDateTimeInJodaSyntax
|
||||
select parseDateTimeInJodaSyntax('2024-10-09 10:30:10-0812'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTimeInJodaSyntax('2024-10-09 10:30:10-0812', 'yyyy-MM-dd HH:mm:ssZ') = toDateTime64('2024-10-09 18:42:10', 6);
|
||||
1
|
||||
select parseDateTimeInJodaSyntax('2024-10-09 10:30:10-08123', 'yyyy-MM-dd HH:mm:ssZZZ'); -- {serverError CANNOT_PARSE_DATETIME}
|
||||
select parseDateTimeInJodaSyntax('2024-10-09 10:30:10EST', 'yyyy-MM-dd HH:mm:ssz') = toDateTime64('2024-10-09 15:30:10', 6);
|
||||
1
|
||||
select parseDateTimeInJodaSyntax('2024-10-09 10:30:10EST', 'yyyy-MM-dd HH:mm:sszzz') = toDateTime64('2024-10-09 15:30:10', 6);
|
||||
1
|
||||
-- incorrect timezone offset and timezone
|
||||
select parseDateTimeInJodaSyntax('2024-10-09 10:30:10-8000', 'yyyy-MM-dd HH:mm:ssZ'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTimeInJodaSyntax('2024-10-09 10:30:10ABCD', 'yyyy-MM-dd HH:mm:ssz'); -- { serverError BAD_ARGUMENTS }
|
||||
-- The following is test of parseDateTime64InJodaSyntax[OrNull/OrZero]
|
||||
select parseDateTime64InJodaSyntax('', '') = toDateTime64('1970-01-01 00:00:00', 0);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2177-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('+0000', 'Z') = toDateTime64('1970-01-01 00:00:00', 0);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('08:01', 'HH:ss') = toDateTime64('1970-01-01 08:00:01', 0);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2024-01-02', 'yyyy-MM-dd') = toDateTime64('2024-01-02 00:00:00', 0);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('10:30:50', 'HH:mm:ss') = toDateTime64('1970-01-01 10:30:50', 0);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2024-12-31 23:30:10.123456-0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ') = toDateTime64('2025-01-01 07:30:10.123456', 6);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2024-01-01 00:00:01.123456+0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ') = toDateTime64('2023-12-31 16:00:01.123456', 6);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2021-01-04 23:12:34') = toDateTime64('2021-01-04 23:12:34', 0);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2021-01-04 23:12:34.331', 'yyyy-MM-dd HH:mm:ss.SSS') = toDateTime64('2021-01-04 23:12:34.331', 3);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2021/01/04 23:12:34.331', 'yyyy/MM/dd HH:mm:ss.SSS') = toDateTime64('2021-01-04 23:12:34.331', 3);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2021-01-04 23:12:34.331'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('2021-01-04 23:12:34.331', 'yyyy-MM-dd HH:mm:ss.SSSS') = toDateTime64('2021-01-04 23:12:34.0331', 4);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2021-01-04 23:12:34.331', 'yyyy-MM-dd HH:mm:ss.SS'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
-- Test timezone and timezone offset for paseDatetTime64InJodaSyntax
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10-0812'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-0812', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ') = toDateTime64('2024-10-09 18:42:10.123456', 6);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-08123', 'yyyy-MM-dd HH:mm:ss.SSSSSSZZZ'); -- {serverError CANNOT_PARSE_DATETIME}
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456EST', 'yyyy-MM-dd HH:mm:ss.SSSSSSz') = toDateTime64('2024-10-09 15:30:10.123456', 6);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456EST', 'yyyy-MM-dd HH:mm:ss.SSSSSSzzz') = toDateTime64('2024-10-09 15:30:10.123456', 6);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2024-11-05-0800 01:02:03.123456', 'yyyy-MM-ddZ HH:mm:ss.SSSSSS') = toDateTime64('2024-11-05 09:02:03.123456', 6);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456America/Los_Angeles', 'yyyy-MM-dd HH:mm:ss.SSSSSSz') = toDateTime64('2024-10-09 17:30:10.123456', 6);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456Australia/Adelaide', 'yyyy-MM-dd HH:mm:ss.SSSSSSz') = toDateTime64('2024-10-09 00:00:10.123456', 6);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123', 'yyyy-dd-MM HH:mm:ss.SSS') = toDateTime64('2024-09-10 10:30:10.123', 3);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('999999 10-09-202410:30:10', 'SSSSSSSSS dd-MM-yyyyHH:mm:ss'); -- {serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-0845', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ') = toDateTime64('2024-10-09 19:15:10.123456', 6);
|
||||
1
|
||||
-- incorrect timezone offset and timezone
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-8000', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456ABCD', 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); -- { serverError BAD_ARGUMENTS }
|
||||
select parseDateTime64InJodaSyntax('2023-02-29 11:22:33Not/Timezone', 'yyyy-MM-dd HH:mm:ssz'); -- { serverError BAD_ARGUMENTS }
|
||||
--leap years and non-leap years
|
||||
select parseDateTime64InJodaSyntax('2024-02-29 11:23:34America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz') = toDateTime64('2024-02-29 19:23:34', 0);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2023-02-29 11:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('2024-02-28 23:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz') = toDateTime64('2024-02-29 07:22:33', 0);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2023-02-28 23:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz') = toDateTime64('2023-03-01 07:22:33', 0);
|
||||
1
|
||||
select parseDateTime64InJodaSyntax('2024-03-01 00:22:33-8000', 'yyyy-MM-dd HH:mm:ssZ'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('2023-03-01 00:22:33-8000', 'yyyy-MM-dd HH:mm:ssZ'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
-- Test for parseDateTime64InJodaSyntaxOrNull
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS') = toDateTime64('2024-10-09 10:30:10.123', 3);
|
||||
1
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS') = toDateTime64('2024-10-09 10:30:10.123456', 6);
|
||||
1
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456789', 'yyyy-MM-dd HH:mm:ss.SSSSSSSSS'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456-0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ') = toDateTime64('2024-10-09 18:30:10.123456', 6);
|
||||
1
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456America/Los_Angeles', 'yyyy-MM-dd HH:mm:ss.SSSSSSz') = toDateTime64('2024-10-09 17:30:10.123456', 6);
|
||||
1
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123', 'yyyy-dd-MM HH:mm:ss.SSS') = toDateTime64('2024-09-10 10:30:10.123', 3);
|
||||
1
|
||||
select parseDateTime64InJodaSyntaxOrNull('2023-02-29 11:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz') is NULL;
|
||||
1
|
||||
select parseDateTime64InJodaSyntaxOrNull('', '') = toDateTime64('1970-01-01 00:00:00', 0);
|
||||
1
|
||||
select parseDateTime64InJodaSyntaxOrNull('2177-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS') is NULL;
|
||||
1
|
||||
-- Test for parseDateTime64InJodaSyntaxOrZero
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS') = toDateTime64('2024-10-09 10:30:10.123', 3);
|
||||
1
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS') = toDateTime64('2024-10-09 10:30:10.123456', 6);
|
||||
1
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456789', 'yyyy-MM-dd HH:mm:ss.SSSSSSSSS'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456-0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ') = toDateTime64('2024-10-09 18:30:10.123456', 6);
|
||||
1
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456America/Los_Angeles', 'yyyy-MM-dd HH:mm:ss.SSSSSSz') = toDateTime64('2024-10-09 17:30:10.123456', 6);
|
||||
1
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123', 'yyyy-dd-MM HH:mm:ss.SSS') = toDateTime64('2024-09-10 10:30:10.123', 3);
|
||||
1
|
||||
select parseDateTime64InJodaSyntaxOrZero('wrong value', 'yyyy-dd-MM HH:mm:ss.SSS') = toDateTime64('1970-01-01 00:00:00.000', 3);
|
||||
1
|
||||
select parseDateTime64InJodaSyntaxOrZero('2023-02-29 11:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz') = toDateTime64('1970-01-01 00:00:00', 0);
|
||||
1
|
||||
select parseDateTime64InJodaSyntaxOrZero('', '') = toDateTime64('1970-01-01 00:00:00', 0);
|
||||
1
|
||||
select parseDateTime64InJodaSyntaxOrZero('2177-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS') = toDateTime64('1970-01-01 00:00:00.000', 3);
|
||||
1
|
||||
|
@ -245,5 +245,74 @@ select parseDateTimeInJodaSyntax('12 AM', 'h a', 'UTC', 'a fourth argument'); --
|
||||
-- The format string argument is optional
|
||||
set session_timezone = 'UTC'; -- don't randomize the session timezone
|
||||
select parseDateTimeInJodaSyntax('2021-01-04 23:12:34') = toDateTime('2021-01-04 23:12:34');
|
||||
-- Test timezone and timezone offset for parseDateTimeInJodaSyntax
|
||||
select parseDateTimeInJodaSyntax('2024-10-09 10:30:10-0812'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTimeInJodaSyntax('2024-10-09 10:30:10-0812', 'yyyy-MM-dd HH:mm:ssZ') = toDateTime64('2024-10-09 18:42:10', 6);
|
||||
select parseDateTimeInJodaSyntax('2024-10-09 10:30:10-08123', 'yyyy-MM-dd HH:mm:ssZZZ'); -- {serverError CANNOT_PARSE_DATETIME}
|
||||
select parseDateTimeInJodaSyntax('2024-10-09 10:30:10EST', 'yyyy-MM-dd HH:mm:ssz') = toDateTime64('2024-10-09 15:30:10', 6);
|
||||
select parseDateTimeInJodaSyntax('2024-10-09 10:30:10EST', 'yyyy-MM-dd HH:mm:sszzz') = toDateTime64('2024-10-09 15:30:10', 6);
|
||||
-- incorrect timezone offset and timezone
|
||||
select parseDateTimeInJodaSyntax('2024-10-09 10:30:10-8000', 'yyyy-MM-dd HH:mm:ssZ'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTimeInJodaSyntax('2024-10-09 10:30:10ABCD', 'yyyy-MM-dd HH:mm:ssz'); -- { serverError BAD_ARGUMENTS }
|
||||
|
||||
-- The following is test of parseDateTime64InJodaSyntax[OrNull/OrZero]
|
||||
select parseDateTime64InJodaSyntax('', '') = toDateTime64('1970-01-01 00:00:00', 0);
|
||||
select parseDateTime64InJodaSyntax('2177-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('+0000', 'Z') = toDateTime64('1970-01-01 00:00:00', 0);
|
||||
select parseDateTime64InJodaSyntax('08:01', 'HH:ss') = toDateTime64('1970-01-01 08:00:01', 0);
|
||||
select parseDateTime64InJodaSyntax('2024-01-02', 'yyyy-MM-dd') = toDateTime64('2024-01-02 00:00:00', 0);
|
||||
select parseDateTime64InJodaSyntax('10:30:50', 'HH:mm:ss') = toDateTime64('1970-01-01 10:30:50', 0);
|
||||
select parseDateTime64InJodaSyntax('2024-12-31 23:30:10.123456-0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ') = toDateTime64('2025-01-01 07:30:10.123456', 6);
|
||||
select parseDateTime64InJodaSyntax('2024-01-01 00:00:01.123456+0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ') = toDateTime64('2023-12-31 16:00:01.123456', 6);
|
||||
select parseDateTime64InJodaSyntax('2021-01-04 23:12:34') = toDateTime64('2021-01-04 23:12:34', 0);
|
||||
select parseDateTime64InJodaSyntax('2021-01-04 23:12:34.331', 'yyyy-MM-dd HH:mm:ss.SSS') = toDateTime64('2021-01-04 23:12:34.331', 3);
|
||||
select parseDateTime64InJodaSyntax('2021/01/04 23:12:34.331', 'yyyy/MM/dd HH:mm:ss.SSS') = toDateTime64('2021-01-04 23:12:34.331', 3);
|
||||
select parseDateTime64InJodaSyntax('2021-01-04 23:12:34.331'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('2021-01-04 23:12:34.331', 'yyyy-MM-dd HH:mm:ss.SSSS') = toDateTime64('2021-01-04 23:12:34.0331', 4);
|
||||
select parseDateTime64InJodaSyntax('2021-01-04 23:12:34.331', 'yyyy-MM-dd HH:mm:ss.SS'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
-- Test timezone and timezone offset for paseDatetTime64InJodaSyntax
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10-0812'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-0812', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ') = toDateTime64('2024-10-09 18:42:10.123456', 6);
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-08123', 'yyyy-MM-dd HH:mm:ss.SSSSSSZZZ'); -- {serverError CANNOT_PARSE_DATETIME}
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456EST', 'yyyy-MM-dd HH:mm:ss.SSSSSSz') = toDateTime64('2024-10-09 15:30:10.123456', 6);
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456EST', 'yyyy-MM-dd HH:mm:ss.SSSSSSzzz') = toDateTime64('2024-10-09 15:30:10.123456', 6);
|
||||
select parseDateTime64InJodaSyntax('2024-11-05-0800 01:02:03.123456', 'yyyy-MM-ddZ HH:mm:ss.SSSSSS') = toDateTime64('2024-11-05 09:02:03.123456', 6);
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456America/Los_Angeles', 'yyyy-MM-dd HH:mm:ss.SSSSSSz') = toDateTime64('2024-10-09 17:30:10.123456', 6);
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456Australia/Adelaide', 'yyyy-MM-dd HH:mm:ss.SSSSSSz') = toDateTime64('2024-10-09 00:00:10.123456', 6);
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123', 'yyyy-dd-MM HH:mm:ss.SSS') = toDateTime64('2024-09-10 10:30:10.123', 3);
|
||||
select parseDateTime64InJodaSyntax('999999 10-09-202410:30:10', 'SSSSSSSSS dd-MM-yyyyHH:mm:ss'); -- {serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-0845', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ') = toDateTime64('2024-10-09 19:15:10.123456', 6);
|
||||
-- incorrect timezone offset and timezone
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-8000', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456ABCD', 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); -- { serverError BAD_ARGUMENTS }
|
||||
select parseDateTime64InJodaSyntax('2023-02-29 11:22:33Not/Timezone', 'yyyy-MM-dd HH:mm:ssz'); -- { serverError BAD_ARGUMENTS }
|
||||
--leap years and non-leap years
|
||||
select parseDateTime64InJodaSyntax('2024-02-29 11:23:34America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz') = toDateTime64('2024-02-29 19:23:34', 0);
|
||||
select parseDateTime64InJodaSyntax('2023-02-29 11:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('2024-02-28 23:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz') = toDateTime64('2024-02-29 07:22:33', 0);
|
||||
select parseDateTime64InJodaSyntax('2023-02-28 23:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz') = toDateTime64('2023-03-01 07:22:33', 0);
|
||||
select parseDateTime64InJodaSyntax('2024-03-01 00:22:33-8000', 'yyyy-MM-dd HH:mm:ssZ'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('2023-03-01 00:22:33-8000', 'yyyy-MM-dd HH:mm:ssZ'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
-- Test for parseDateTime64InJodaSyntaxOrNull
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS') = toDateTime64('2024-10-09 10:30:10.123', 3);
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS') = toDateTime64('2024-10-09 10:30:10.123456', 6);
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456789', 'yyyy-MM-dd HH:mm:ss.SSSSSSSSS'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456-0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ') = toDateTime64('2024-10-09 18:30:10.123456', 6);
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456America/Los_Angeles', 'yyyy-MM-dd HH:mm:ss.SSSSSSz') = toDateTime64('2024-10-09 17:30:10.123456', 6);
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123', 'yyyy-dd-MM HH:mm:ss.SSS') = toDateTime64('2024-09-10 10:30:10.123', 3);
|
||||
select parseDateTime64InJodaSyntaxOrNull('2023-02-29 11:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz') is NULL;
|
||||
select parseDateTime64InJodaSyntaxOrNull('', '') = toDateTime64('1970-01-01 00:00:00', 0);
|
||||
select parseDateTime64InJodaSyntaxOrNull('2177-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS') is NULL;
|
||||
-- Test for parseDateTime64InJodaSyntaxOrZero
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS') = toDateTime64('2024-10-09 10:30:10.123', 3);
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS') = toDateTime64('2024-10-09 10:30:10.123456', 6);
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456789', 'yyyy-MM-dd HH:mm:ss.SSSSSSSSS'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456-0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ') = toDateTime64('2024-10-09 18:30:10.123456', 6);
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456America/Los_Angeles', 'yyyy-MM-dd HH:mm:ss.SSSSSSz') = toDateTime64('2024-10-09 17:30:10.123456', 6);
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123', 'yyyy-dd-MM HH:mm:ss.SSS') = toDateTime64('2024-09-10 10:30:10.123', 3);
|
||||
select parseDateTime64InJodaSyntaxOrZero('wrong value', 'yyyy-dd-MM HH:mm:ss.SSS') = toDateTime64('1970-01-01 00:00:00.000', 3);
|
||||
select parseDateTime64InJodaSyntaxOrZero('2023-02-29 11:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz') = toDateTime64('1970-01-01 00:00:00', 0);
|
||||
select parseDateTime64InJodaSyntaxOrZero('', '') = toDateTime64('1970-01-01 00:00:00', 0);
|
||||
select parseDateTime64InJodaSyntaxOrZero('2177-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS') = toDateTime64('1970-01-01 00:00:00.000', 3);
|
||||
|
||||
-- { echoOff }
|
||||
|
@ -1,44 +0,0 @@
|
||||
parseDateTime64InJodaSyntax
|
||||
2077-10-09 10:30:10.123
|
||||
1970-01-01 08:00:00
|
||||
1970-01-01 08:00:01
|
||||
2024-01-02 00:00:00
|
||||
1970-01-01 10:30:50
|
||||
2025-01-01 15:30:10.123456
|
||||
2024-01-01 00:00:01.123456
|
||||
2024-10-09 10:30:10.123
|
||||
2024-10-09 10:30:10.123456
|
||||
2024-10-10 02:30:10.123456
|
||||
2024-11-05 17:02:03.123456
|
||||
2024-10-10 01:30:10.123456
|
||||
2024-10-09 08:00:10.123456
|
||||
2024-09-10 10:30:10.123
|
||||
2024-09-10 10:30:10.000999999
|
||||
2024-10-10 03:15:10.123456
|
||||
2024-03-01 03:23:34
|
||||
2024-02-29 15:22:33
|
||||
2023-03-01 15:22:33
|
||||
2024-03-04 16:22:33
|
||||
2023-03-04 16:22:33
|
||||
parseDateTime64InJodaSyntaxOrZero
|
||||
2024-10-09 10:30:10.123
|
||||
2024-10-09 10:30:10.123456
|
||||
1970-01-01 08:00:00.000000000
|
||||
2024-10-10 02:30:10.123456
|
||||
2024-10-10 01:30:10.123456
|
||||
2024-09-10 10:30:10.123
|
||||
1970-01-01 08:00:00.000
|
||||
1970-01-01 08:00:00
|
||||
1970-01-01 08:00:00
|
||||
1970-01-01 08:00:00.000
|
||||
parseDateTime64InJodaSyntaxOrNull
|
||||
2024-10-09 10:30:10.123
|
||||
2024-10-09 10:30:10.123456
|
||||
\N
|
||||
2024-10-10 02:30:10.123456
|
||||
2024-10-10 01:30:10.123456
|
||||
2024-09-10 10:30:10.123
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
1970-01-01 00:00:00
|
@ -1,54 +0,0 @@
|
||||
set session_timezone = 'Asia/Shanghai';
|
||||
|
||||
select 'parseDateTime64InJodaSyntax';
|
||||
select parseDateTime64InJodaSyntax('', ''); -- { serverError VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE }
|
||||
select parseDateTime64InJodaSyntax('2077-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS');
|
||||
select parseDateTime64InJodaSyntax('2177-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('+0000', 'Z');
|
||||
select parseDateTime64InJodaSyntax('08:01', 'HH:ss');
|
||||
select parseDateTime64InJodaSyntax('2024-01-02', 'yyyy-MM-dd');
|
||||
select parseDateTime64InJodaSyntax('10:30:50', 'HH:mm:ss');
|
||||
select parseDateTime64InJodaSyntax('2024-12-31 23:30:10.123456-0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ');
|
||||
select parseDateTime64InJodaSyntax('2024-01-01 00:00:01.123456+0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ');
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS');
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS');
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456789', 'yyyy-MM-dd HH:mm:ss.SSSSSSSSS'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ');
|
||||
select parseDateTime64InJodaSyntax('2024-11-05-0800 01:02:03.123456', 'yyyy-MM-ddZ HH:mm:ss.SSSSSS');
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456America/Los_Angeles', 'yyyy-MM-dd HH:mm:ss.SSSSSSz');
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456Australia/Adelaide', 'yyyy-MM-dd HH:mm:ss.SSSSSSz');
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123', 'yyyy-dd-MM HH:mm:ss.SSS');
|
||||
select parseDateTime64InJodaSyntax('999999 10-09-202410:30:10', 'SSSSSSSSS dd-MM-yyyyHH:mm:ss');
|
||||
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-0845', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ');
|
||||
select parseDateTime64InJodaSyntax('2023-02-29 11:22:33Not/Timezone', 'yyyy-MM-dd HH:mm:ssz'); -- { serverError BAD_ARGUMENTS }
|
||||
--leap years and non-leap years
|
||||
select parseDateTime64InJodaSyntax('2024-02-29 11:23:34America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz');
|
||||
select parseDateTime64InJodaSyntax('2023-02-29 11:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime64InJodaSyntax('2024-02-28 23:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz');
|
||||
select parseDateTime64InJodaSyntax('2023-02-28 23:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz');
|
||||
select parseDateTime64InJodaSyntax('2024-03-01 00:22:33-8000', 'yyyy-MM-dd HH:mm:ssZ');
|
||||
select parseDateTime64InJodaSyntax('2023-03-01 00:22:33-8000', 'yyyy-MM-dd HH:mm:ssZ');
|
||||
select 'parseDateTime64InJodaSyntaxOrZero';
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS');
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS');
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456789', 'yyyy-MM-dd HH:mm:ss.SSSSSSSSS');
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456-0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ');
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456America/Los_Angeles', 'yyyy-MM-dd HH:mm:ss.SSSSSSz');
|
||||
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123', 'yyyy-dd-MM HH:mm:ss.SSS');
|
||||
select parseDateTime64InJodaSyntaxOrZero('wrong value', 'yyyy-dd-MM HH:mm:ss.SSS');
|
||||
select parseDateTime64InJodaSyntaxOrZero('2023-02-29 11:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz');
|
||||
select parseDateTime64InJodaSyntaxOrZero('', '');
|
||||
select parseDateTime64InJodaSyntaxOrZero('2177-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS');
|
||||
select 'parseDateTime64InJodaSyntaxOrNull';
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS');
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS');
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456789', 'yyyy-MM-dd HH:mm:ss.SSSSSSSSS');
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456-0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ');
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456America/Los_Angeles', 'yyyy-MM-dd HH:mm:ss.SSSSSSz');
|
||||
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123', 'yyyy-dd-MM HH:mm:ss.SSS');
|
||||
select parseDateTime64InJodaSyntaxOrNull('2023-02-29 11:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz');
|
||||
select parseDateTime64InJodaSyntaxOrNull('', '');
|
||||
select parseDateTime64InJodaSyntaxOrNull('2177-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS');
|
||||
|
||||
set session_timezone = 'UTC';
|
||||
select parseDateTime64InJodaSyntax('', '');
|
@ -0,0 +1,54 @@
|
||||
adaptive non-const, before merge
|
||||
all_1_1_0 0 10 0
|
||||
all_1_1_0 1 5 10
|
||||
all_1_1_0 2 0 14
|
||||
all_2_2_0 0 2 15
|
||||
all_2_2_0 1 2 17
|
||||
all_2_2_0 2 2 19
|
||||
all_2_2_0 3 2 21
|
||||
all_2_2_0 4 2 23
|
||||
all_2_2_0 5 2 25
|
||||
all_2_2_0 6 2 27
|
||||
all_2_2_0 7 1 29
|
||||
all_2_2_0 8 0 29
|
||||
all_1_1_0 25
|
||||
all_2_2_0 25
|
||||
adaptive non-const, after merge
|
||||
all_1_2_1 0 10 0
|
||||
all_1_2_1 1 5 10
|
||||
all_1_2_1 2 2 15
|
||||
all_1_2_1 3 2 17
|
||||
all_1_2_1 4 2 19
|
||||
all_1_2_1 5 2 21
|
||||
all_1_2_1 6 2 23
|
||||
all_1_2_1 7 2 25
|
||||
all_1_2_1 8 2 27
|
||||
all_1_2_1 9 1 29
|
||||
all_1_2_1 10 0 29
|
||||
all_1_2_1 88
|
||||
adaptive const, before merge
|
||||
all_1_1_0 0 10 0
|
||||
all_1_1_0 1 5 10
|
||||
all_1_1_0 2 0 14
|
||||
all_2_2_0 0 2 15
|
||||
all_2_2_0 1 2 17
|
||||
all_2_2_0 2 2 19
|
||||
all_2_2_0 3 2 21
|
||||
all_2_2_0 4 2 23
|
||||
all_2_2_0 5 2 25
|
||||
all_2_2_0 6 2 27
|
||||
all_2_2_0 7 1 29
|
||||
all_2_2_0 8 0 29
|
||||
all_1_1_0 25
|
||||
all_2_2_0 25
|
||||
adaptive const, after merge
|
||||
all_1_2_1 0 4 0
|
||||
all_1_2_1 1 4 4
|
||||
all_1_2_1 2 4 8
|
||||
all_1_2_1 3 4 12
|
||||
all_1_2_1 4 4 16
|
||||
all_1_2_1 5 4 20
|
||||
all_1_2_1 6 4 24
|
||||
all_1_2_1 7 2 28
|
||||
all_1_2_1 8 0 29
|
||||
all_1_2_1 25
|
@ -0,0 +1,53 @@
|
||||
DROP TABLE IF EXISTS t_index_granularity;
|
||||
|
||||
CREATE TABLE t_index_granularity (id UInt64, s String)
|
||||
ENGINE = MergeTree ORDER BY id
|
||||
SETTINGS min_bytes_for_wide_part = 0,
|
||||
index_granularity = 10,
|
||||
index_granularity_bytes = 4096,
|
||||
merge_max_block_size = 10,
|
||||
merge_max_block_size_bytes = 4096,
|
||||
enable_index_granularity_compression = 1,
|
||||
use_const_adaptive_granularity = 0,
|
||||
enable_vertical_merge_algorithm = 0;
|
||||
|
||||
INSERT INTO t_index_granularity SELECT number, 'a' FROM numbers(15);
|
||||
INSERT INTO t_index_granularity SELECT number, repeat('a', 2048) FROM numbers(15, 15);
|
||||
|
||||
SELECT 'adaptive non-const, before merge';
|
||||
SELECT * FROM mergeTreeIndex(currentDatabase(), t_index_granularity) ORDER BY ALL;
|
||||
SELECT name, index_granularity_bytes_in_memory FROM system.parts WHERE database = currentDatabase() AND table = 't_index_granularity' AND active;
|
||||
|
||||
OPTIMIZE TABLE t_index_granularity FINAL;
|
||||
|
||||
SELECT 'adaptive non-const, after merge';
|
||||
SELECT * FROM mergeTreeIndex(currentDatabase(), t_index_granularity) ORDER BY ALL;
|
||||
SELECT name, index_granularity_bytes_in_memory FROM system.parts WHERE database = currentDatabase() AND table = 't_index_granularity' AND active;
|
||||
|
||||
DROP TABLE t_index_granularity;
|
||||
|
||||
CREATE TABLE t_index_granularity (id UInt64, s String)
|
||||
ENGINE = MergeTree ORDER BY id
|
||||
SETTINGS min_bytes_for_wide_part = 0,
|
||||
index_granularity = 10,
|
||||
index_granularity_bytes = 4096,
|
||||
merge_max_block_size = 10,
|
||||
merge_max_block_size_bytes = 4096,
|
||||
enable_index_granularity_compression = 1,
|
||||
use_const_adaptive_granularity = 1,
|
||||
enable_vertical_merge_algorithm = 0;
|
||||
|
||||
INSERT INTO t_index_granularity SELECT number, 'a' FROM numbers(15);
|
||||
INSERT INTO t_index_granularity SELECT number, repeat('a', 2048) FROM numbers(15, 15);
|
||||
|
||||
SELECT 'adaptive const, before merge';
|
||||
SELECT * FROM mergeTreeIndex(currentDatabase(), t_index_granularity) ORDER BY ALL;
|
||||
SELECT name, index_granularity_bytes_in_memory FROM system.parts WHERE database = currentDatabase() AND table = 't_index_granularity' AND active;
|
||||
|
||||
OPTIMIZE TABLE t_index_granularity FINAL;
|
||||
|
||||
SELECT 'adaptive const, after merge';
|
||||
SELECT * FROM mergeTreeIndex(currentDatabase(), t_index_granularity) ORDER BY ALL;
|
||||
SELECT name, index_granularity_bytes_in_memory FROM system.parts WHERE database = currentDatabase() AND table = 't_index_granularity' AND active;
|
||||
|
||||
DROP TABLE t_index_granularity;
|
@ -1,2 +1,2 @@
|
||||
Condition: (_CAST(toDate(ts)) in (-Inf, 1703980800])
|
||||
Granules: 3/3
|
||||
Condition: and((materialize(auid) in [1, 1]), (_CAST(toDate(ts)) in (-Inf, 1703980800]))
|
||||
Granules: 1/3
|
||||
|
@ -1 +1,2 @@
|
||||
88 88
|
||||
88 128
|
||||
25 25
|
||||
|
@ -8,8 +8,14 @@ CREATE TABLE t (
|
||||
ENGINE MergeTree()
|
||||
ORDER by key SETTINGS index_granularity = 10, index_granularity_bytes = '1024K';
|
||||
|
||||
ALTER TABLE t MODIFY SETTING enable_index_granularity_compression = 0;
|
||||
|
||||
INSERT INTO t SELECT number, toString(number) FROM numbers(100);
|
||||
|
||||
SELECT index_granularity_bytes_in_memory, index_granularity_bytes_in_memory_allocated FROM system.parts where table = 't' and database = currentDatabase();
|
||||
ALTER TABLE t MODIFY SETTING enable_index_granularity_compression = 1;
|
||||
|
||||
INSERT INTO t SELECT number, toString(number) FROM numbers(100);
|
||||
|
||||
SELECT index_granularity_bytes_in_memory, index_granularity_bytes_in_memory_allocated FROM system.parts where table = 't' and database = currentDatabase() ORDER BY name;
|
||||
|
||||
DROP TABLE IF EXISTS t;
|
||||
|
@ -0,0 +1 @@
|
||||
4
|
@ -0,0 +1,7 @@
|
||||
DROP TABLE IF EXISTS decimal_dt;
|
||||
|
||||
CREATE TABLE decimal_dt (timestamp DateTime64(9)) ENGINE=MergeTree() ORDER BY timestamp;
|
||||
INSERT INTO decimal_dt VALUES (toDate('2024-11-11')),(toDate('2024-11-12')),(toDate('2024-11-13')),(toDate('2024-11-14')),(toDate('2024-11-15')),(toDate('2024-11-16')),(toDate('2024-11-17'));
|
||||
SELECT count() FROM decimal_dt WHERE toDayOfWeek(timestamp) > 3;
|
||||
|
||||
DROP TABLE IF EXISTS decimal_dt;
|
@ -0,0 +1 @@
|
||||
SELECT deltaSumTimestamp(1, 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
39
tests/queries/0_stateless/03272_client_highlighting_bug.expect
Executable file
39
tests/queries/0_stateless/03272_client_highlighting_bug.expect
Executable file
@ -0,0 +1,39 @@
|
||||
#!/usr/bin/expect -f
|
||||
|
||||
set basedir [file dirname $argv0]
|
||||
set basename [file tail $argv0]
|
||||
if {[info exists env(CLICKHOUSE_TMP)]} {
|
||||
set CLICKHOUSE_TMP $env(CLICKHOUSE_TMP)
|
||||
} else {
|
||||
set CLICKHOUSE_TMP "."
|
||||
}
|
||||
exp_internal -f $CLICKHOUSE_TMP/$basename.debuglog 0
|
||||
set history_file $CLICKHOUSE_TMP/$basename.history
|
||||
|
||||
log_user 0
|
||||
set timeout 60
|
||||
match_max 100000
|
||||
|
||||
expect_after {
|
||||
# Do not ignore eof from expect
|
||||
-i $any_spawn_id eof { exp_continue }
|
||||
# A default timeout action is to do nothing, change it to fail
|
||||
-i $any_spawn_id timeout { exit 1 }
|
||||
}
|
||||
|
||||
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --enable-progress-table-toggle=0 --history_file=$history_file"
|
||||
|
||||
# (?n) - Do not match new lines
|
||||
expect -re "(?n)ClickHouse client version \[\\d\]{2}\.\[\\d\]{1,2}\.\[\\d\]{1,2}\.\[\\d\]{1,}.*\r"
|
||||
expect -re "(?n)Connecting to database .* at localhost:9000 as user default\.\r"
|
||||
expect -re "(?n)Connected to ClickHouse server version \[\\d\]{2}\.\[\\d\]{1,2}\.\[\\d\]{1,2}\.\r"
|
||||
expect ":) "
|
||||
|
||||
send -- "SELECT (+123) AS x"
|
||||
expect {[47G[0;1mSELECT[0m (+[0;38;5;40m1[0m) [0;1mAS[0m [0;38;5;44mx[0m[J[63G}
|
||||
|
||||
send -- "\r"
|
||||
expect ":) "
|
||||
|
||||
send -- ""
|
||||
expect eof
|
@ -1 +1 @@
|
||||
1 1 1
|
||||
1 1 1 1
|
||||
|
@ -1 +1,4 @@
|
||||
SELECT primary_key_bytes_in_memory < 16000, primary_key_bytes_in_memory_allocated < 16000, primary_key_bytes_in_memory_allocated / primary_key_bytes_in_memory < 1.1 FROM system.parts WHERE database = 'test' AND table = 'hits';
|
||||
-- Force PK load
|
||||
SELECT CounterID FROM test.hits WHERE CounterID > 0 LIMIT 1 FORMAT Null;
|
||||
-- Check PK size
|
||||
SELECT primary_key_bytes_in_memory > 0, primary_key_bytes_in_memory < 16000, primary_key_bytes_in_memory_allocated < 16000, primary_key_bytes_in_memory_allocated / primary_key_bytes_in_memory < 1.1 FROM system.parts WHERE database = 'test' AND table = 'hits';
|
||||
|
@ -1996,6 +1996,7 @@ jbod
|
||||
jdbc
|
||||
jemalloc
|
||||
jeprof
|
||||
joda
|
||||
joinGet
|
||||
joinGetOrNull
|
||||
json
|
||||
|
Loading…
Reference in New Issue
Block a user