Add setting to fallback to V1 serialization for Dynamic and Object

This commit is contained in:
avogar 2024-11-12 15:18:00 +00:00
parent 69e4f93a2a
commit 013fde41e4
8 changed files with 23 additions and 8 deletions

View File

@ -1222,6 +1222,9 @@ Possible values: non-negative numbers. Note that if the value is too small or to
If true then data can be parsed directly to columns with custom serialization (e.g. Sparse) according to hints for serialization got from the table. If true then data can be parsed directly to columns with custom serialization (e.g. Sparse) according to hints for serialization got from the table.
)", 0) \ )", 0) \
\ \
DECLARE(Bool, merge_tree_use_v1_object_and_dynamic_serialization, false, R"(
When enabled, V1 serialization version of JSON and Dynamic types will be used in MergeTree instead of V2.
)", 0) \
DECLARE(UInt64, merge_tree_min_rows_for_concurrent_read, (20 * 8192), R"( DECLARE(UInt64, merge_tree_min_rows_for_concurrent_read, (20 * 8192), R"(
If the number of rows to be read from a file of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads. If the number of rows to be read from a file of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads.

View File

@ -77,6 +77,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"backup_restore_keeper_max_retries_while_handling_error", 0, 20, "New setting."}, {"backup_restore_keeper_max_retries_while_handling_error", 0, 20, "New setting."},
{"backup_restore_finish_timeout_after_error_sec", 0, 180, "New setting."}, {"backup_restore_finish_timeout_after_error_sec", 0, 180, "New setting."},
{"parallel_replicas_local_plan", false, true, "Use local plan for local replica in a query with parallel replicas"}, {"parallel_replicas_local_plan", false, true, "Use local plan for local replica in a query with parallel replicas"},
{"merge_tree_use_v1_object_and_dynamic_serialization", true, false, "Add new serialization V2 version for JSON and Dynamic types"}
} }
}, },
{"24.10", {"24.10",

View File

@ -115,12 +115,15 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix(
dynamic_state->variant_names = variant_info.variant_names; dynamic_state->variant_names = variant_info.variant_names;
const auto & variant_column = column_dynamic.getVariantColumn(); const auto & variant_column = column_dynamic.getVariantColumn();
/// In V1 version write max_dynamic_types parameter.
if (structure_version == DynamicSerializationVersion::Value::V1)
writeVarUInt(column_dynamic.getMaxDynamicTypes(), *stream);
/// Write information about dynamic types. /// Write information about dynamic types.
dynamic_state->num_dynamic_types = dynamic_state->variant_names.size() - 1; /// -1 for SharedVariant dynamic_state->num_dynamic_types = dynamic_state->variant_names.size() - 1; /// -1 for SharedVariant
/// In V1 version we had max_dynamic_types parameter written, but now we need only actual number of variants.
/// For compatibility we need to write V1 version sometimes, but we should write number of variants instead of
/// max_dynamic_types (because now max_dynamic_types can be different in different serialized columns).
if (structure_version == DynamicSerializationVersion::Value::V1)
writeVarUInt(dynamic_state->num_dynamic_types, *stream);
writeVarUInt(dynamic_state->num_dynamic_types, *stream); writeVarUInt(dynamic_state->num_dynamic_types, *stream);
if (settings.data_types_binary_encoding) if (settings.data_types_binary_encoding)
{ {

View File

@ -201,15 +201,18 @@ void SerializationObject::serializeBinaryBulkStatePrefix(
return; return;
} }
/// In V1 version write max_dynamic_paths parameter.
if (serialization_version == ObjectSerializationVersion::Value::V1)
writeVarUInt(column_object.getMaxDynamicPaths(), *stream);
/// Write all dynamic paths in sorted order. /// Write all dynamic paths in sorted order.
object_state->sorted_dynamic_paths.reserve(dynamic_paths.size()); object_state->sorted_dynamic_paths.reserve(dynamic_paths.size());
for (const auto & [path, _] : dynamic_paths) for (const auto & [path, _] : dynamic_paths)
object_state->sorted_dynamic_paths.push_back(path); object_state->sorted_dynamic_paths.push_back(path);
std::sort(object_state->sorted_dynamic_paths.begin(), object_state->sorted_dynamic_paths.end()); std::sort(object_state->sorted_dynamic_paths.begin(), object_state->sorted_dynamic_paths.end());
/// In V1 version we had max_dynamic_paths parameter written, but now we need only actual number of dynamic paths.
/// For compatibility we need to write V1 version sometimes, but we should write number of dynamic paths instead of
/// max_dynamic_paths (because now max_dynamic_paths can be different in different serialized columns).
if (serialization_version == ObjectSerializationVersion::Value::V1)
writeVarUInt(object_state->sorted_dynamic_paths.size(), *stream);
writeVarUInt(object_state->sorted_dynamic_paths.size(), *stream); writeVarUInt(object_state->sorted_dynamic_paths.size(), *stream);
for (const auto & path : object_state->sorted_dynamic_paths) for (const auto & path : object_state->sorted_dynamic_paths)
writeStringBinary(path, *stream); writeStringBinary(path, *stream);

View File

@ -154,6 +154,7 @@ void writeColumnSingleGranule(
serialize_settings.position_independent_encoding = true; serialize_settings.position_independent_encoding = true;
serialize_settings.low_cardinality_max_dictionary_size = 0; serialize_settings.low_cardinality_max_dictionary_size = 0;
serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization;
serialize_settings.use_v1_object_and_dynamic_serialization = settings.use_v1_object_and_dynamic_serialization;
serialize_settings.object_and_dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::ObjectAndDynamicStatisticsMode::PREFIX; serialize_settings.object_and_dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::ObjectAndDynamicStatisticsMode::PREFIX;
serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state); serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state);

View File

@ -462,6 +462,7 @@ void MergeTreeDataPartWriterWide::writeColumn(
{ {
ISerialization::SerializeBinaryBulkSettings serialize_settings; ISerialization::SerializeBinaryBulkSettings serialize_settings;
serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization;
serialize_settings.use_v1_object_and_dynamic_serialization = settings.use_v1_object_and_dynamic_serialization;
serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); serialize_settings.getter = createStreamGetter(name_and_type, offset_columns);
serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second); serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second);
} }

View File

@ -12,6 +12,7 @@ namespace Setting
extern const SettingsBool low_cardinality_use_single_dictionary_for_part; extern const SettingsBool low_cardinality_use_single_dictionary_for_part;
extern const SettingsUInt64 min_compress_block_size; extern const SettingsUInt64 min_compress_block_size;
extern const SettingsUInt64 max_compress_block_size; extern const SettingsUInt64 max_compress_block_size;
extern const SettingsBool merge_tree_use_v1_object_and_dynamic_serialization;
} }
namespace MergeTreeSetting namespace MergeTreeSetting
@ -53,6 +54,7 @@ MergeTreeWriterSettings::MergeTreeWriterSettings(
, low_cardinality_max_dictionary_size(global_settings[Setting::low_cardinality_max_dictionary_size]) , low_cardinality_max_dictionary_size(global_settings[Setting::low_cardinality_max_dictionary_size])
, low_cardinality_use_single_dictionary_for_part(global_settings[Setting::low_cardinality_use_single_dictionary_for_part] != 0) , low_cardinality_use_single_dictionary_for_part(global_settings[Setting::low_cardinality_use_single_dictionary_for_part] != 0)
, use_compact_variant_discriminators_serialization((*storage_settings)[MergeTreeSetting::use_compact_variant_discriminators_serialization]) , use_compact_variant_discriminators_serialization((*storage_settings)[MergeTreeSetting::use_compact_variant_discriminators_serialization])
, use_v1_object_and_dynamic_serialization(global_settings[Setting::merge_tree_use_v1_object_and_dynamic_serialization])
, use_adaptive_write_buffer_for_dynamic_subcolumns((*storage_settings)[MergeTreeSetting::use_adaptive_write_buffer_for_dynamic_subcolumns]) , use_adaptive_write_buffer_for_dynamic_subcolumns((*storage_settings)[MergeTreeSetting::use_adaptive_write_buffer_for_dynamic_subcolumns])
, adaptive_write_buffer_initial_size((*storage_settings)[MergeTreeSetting::adaptive_write_buffer_initial_size]) , adaptive_write_buffer_initial_size((*storage_settings)[MergeTreeSetting::adaptive_write_buffer_initial_size])
{ {

View File

@ -83,6 +83,7 @@ struct MergeTreeWriterSettings
size_t low_cardinality_max_dictionary_size; size_t low_cardinality_max_dictionary_size;
bool low_cardinality_use_single_dictionary_for_part; bool low_cardinality_use_single_dictionary_for_part;
bool use_compact_variant_discriminators_serialization; bool use_compact_variant_discriminators_serialization;
bool use_v1_object_and_dynamic_serialization;
bool use_adaptive_write_buffer_for_dynamic_subcolumns; bool use_adaptive_write_buffer_for_dynamic_subcolumns;
size_t adaptive_write_buffer_initial_size; size_t adaptive_write_buffer_initial_size;
}; };