Merge pull request #55201 from canhld94/column_level_compress_block

Column-level compression block sizes
This commit is contained in:
Robert Schulze 2024-01-23 11:22:03 +01:00 committed by GitHub
commit e67076ea2b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 401 additions and 48 deletions

View File

@ -39,8 +39,8 @@ If you need to update rows frequently, we recommend using the [`ReplacingMergeTr
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTIC(stat1)] [TTL expr1] [PRIMARY KEY],
name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTIC(stat2)] [TTL expr2] [PRIMARY KEY],
name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTIC(stat1)] [TTL expr1] [PRIMARY KEY] [SETTINGS (name = value, ...)],
name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTIC(stat2)] [TTL expr2] [PRIMARY KEY] [SETTINGS (name = value, ...)],
...
INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1],
INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2],
@ -56,7 +56,7 @@ ORDER BY expr
[DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ]
[WHERE conditions]
[GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ]
[SETTINGS name=value, ...]
[SETTINGS name = value, ...]
```
For a description of parameters, see the [CREATE query description](/docs/en/sql-reference/statements/create/table.md).
@ -620,7 +620,7 @@ The `TTL` clause cant be used for key columns.
#### Creating a table with `TTL`:
``` sql
CREATE TABLE example_table
CREATE TABLE tab
(
d DateTime,
a Int TTL d + INTERVAL 1 MONTH,
@ -635,7 +635,7 @@ ORDER BY d;
#### Adding TTL to a column of an existing table
``` sql
ALTER TABLE example_table
ALTER TABLE tab
MODIFY COLUMN
c String TTL d + INTERVAL 1 DAY;
```
@ -643,7 +643,7 @@ ALTER TABLE example_table
#### Altering TTL of the column
``` sql
ALTER TABLE example_table
ALTER TABLE tab
MODIFY COLUMN
c String TTL d + INTERVAL 1 MONTH;
```
@ -681,7 +681,7 @@ If a column is not part of the `GROUP BY` expression and is not set explicitly i
#### Creating a table with `TTL`:
``` sql
CREATE TABLE example_table
CREATE TABLE tab
(
d DateTime,
a Int
@ -697,7 +697,7 @@ TTL d + INTERVAL 1 MONTH DELETE,
#### Altering `TTL` of the table:
``` sql
ALTER TABLE example_table
ALTER TABLE tab
MODIFY TTL d + INTERVAL 1 DAY;
```
@ -1366,7 +1366,7 @@ In this sample configuration:
The statistic declaration is in the columns section of the `CREATE` query for tables from the `*MergeTree*` Family when we enable `set allow_experimental_statistic = 1`.
``` sql
CREATE TABLE example_table
CREATE TABLE tab
(
a Int64 STATISTIC(tdigest),
b Float64
@ -1378,8 +1378,8 @@ ORDER BY a
We can also manipulate statistics with `ALTER` statements.
```sql
ALTER TABLE example_table ADD STATISTIC b TYPE tdigest;
ALTER TABLE example_table DROP STATISTIC a TYPE tdigest;
ALTER TABLE tab ADD STATISTIC b TYPE tdigest;
ALTER TABLE tab DROP STATISTIC a TYPE tdigest;
```
These lightweight statistics aggregate information about distribution of values in columns.
@ -1390,3 +1390,42 @@ They can be used for query optimization when we enable `set allow_statistic_opti
- `tdigest`
Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch.
## Column-level Settings {#column-level-settings}
Certain MergeTree settings can be override at column level:
- `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table.
- `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark.
Example:
```sql
CREATE TABLE tab
(
id Int64,
document String SETTINGS (min_compress_block_size = 16777216, max_compress_block_size = 16777216)
)
ENGINE = MergeTree
ORDER BY id
```
Column-level settings can be modified or removed using [ALTER MODIFY COLUMN](/docs/en/sql-reference/statements/alter/column.md), for example:
- Remove `SETTINGS` from column declaration:
```sql
ALTER TABLE tab MODIFY COLUMN document REMOVE SETTINGS;
```
- Modify a setting:
```sql
ALTER TABLE tab MODIFY COLUMN document MODIFY SETTING min_compress_block_size = 8192;
```
- Reset one or more settings, also removes the setting declaration in the column expression of the table's CREATE query.
```sql
ALTER TABLE tab MODIFY COLUMN document RESET SETTING min_compress_block_size;
```

View File

@ -23,10 +23,11 @@ The following actions are supported:
- [RENAME COLUMN](#rename-column) — Renames an existing column.
- [CLEAR COLUMN](#clear-column) — Resets column values.
- [COMMENT COLUMN](#comment-column) — Adds a text comment to the column.
- [MODIFY COLUMN](#modify-column) — Changes columns type, default expression and TTL.
- [MODIFY COLUMN](#modify-column) — Changes columns type, default expression, TTL, and column settings.
- [MODIFY COLUMN REMOVE](#modify-column-remove) — Removes one of the column properties.
- [MODIFY COLUMN MODIFY SETTING](#modify-column-modify-setting) - Changes column settings.
- [MODIFY COLUMN RESET SETTING](#modify-column-reset-setting) - Reset column settings.
- [MATERIALIZE COLUMN](#materialize-column) — Materializes the column in the parts where the column is missing.
These actions are described in detail below.
## ADD COLUMN
@ -208,7 +209,7 @@ The `ALTER` query for changing columns is replicated. The instructions are saved
## MODIFY COLUMN REMOVE
Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`.
Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`, `SETTING`.
Syntax:
@ -228,6 +229,43 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
- [REMOVE TTL](ttl.md).
## MODIFY COLUMN MODIFY SETTING
Modify a column setting.
Syntax:
```sql
ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING name=value,...;
```
**Example**
Modify column's `max_compress_block_size` to `1MB`:
```sql
ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING max_compress_block_size = 1048576;
```
## MODIFY COLUMN RESET SETTING
Reset a column setting, also removes the setting declaration in the column expression of the table's CREATE query.
Syntax:
```sql
ALTER TABLE table_name MODIFY COLUMN RESET SETTING name,...;
```
**Example**
Remove column setting `max_compress_block_size` to `1MB`:
```sql
ALTER TABLE table_name MODIFY COLUMN REMOVE SETTING max_compress_block_size;
```
## MATERIALIZE COLUMN
Materializes or updates a column with an expression for a default value (`DEFAULT` or `MATERIALIZED`).

View File

@ -11,6 +11,8 @@
#include <Common/atomicRename.h>
#include <Common/PoolId.h>
#include <Common/logger_useful.h>
#include <Parsers/ASTSetQuery.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <base/hex.h>
#include <Core/Defines.h>
@ -463,6 +465,14 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns)
column_declaration->children.push_back(column_declaration->ttl);
}
if (!column.settings.empty())
{
auto settings = std::make_shared<ASTSetQuery>();
settings->is_standalone = false;
settings->changes = column.settings;
column_declaration->settings = std::move(settings);
}
columns_list->children.push_back(column_declaration_ptr);
}
@ -670,6 +680,12 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
if (col_decl.ttl)
column.ttl = col_decl.ttl;
if (col_decl.settings)
{
column.settings = col_decl.settings->as<ASTSetQuery &>().changes;
MergeTreeColumnSettings::validate(column.settings);
}
res.add(std::move(column));
}

View File

@ -57,70 +57,83 @@ ASTPtr ASTColumnDeclaration::clone() const
res->children.push_back(res->collation);
}
if (settings)
{
res->settings = settings->clone();
res->children.push_back(res->settings);
}
return res;
}
void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
void ASTColumnDeclaration::formatImpl(const FormatSettings & format_settings, FormatState & state, FormatStateStacked frame) const
{
frame.need_parens = false;
/// We have to always backquote column names to avoid ambiguouty with INDEX and other declarations in CREATE query.
settings.ostr << backQuote(name);
format_settings.ostr << backQuote(name);
if (type)
{
settings.ostr << ' ';
format_settings.ostr << ' ';
FormatStateStacked type_frame = frame;
type_frame.indent = 0;
type->formatImpl(settings, state, type_frame);
type->formatImpl(format_settings, state, type_frame);
}
if (null_modifier)
{
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "")
<< (*null_modifier ? "" : "NOT ") << "NULL" << (settings.hilite ? hilite_none : "");
format_settings.ostr << ' ' << (format_settings.hilite ? hilite_keyword : "")
<< (*null_modifier ? "" : "NOT ") << "NULL" << (format_settings.hilite ? hilite_none : "");
}
if (default_expression)
{
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "");
format_settings.ostr << ' ' << (format_settings.hilite ? hilite_keyword : "") << default_specifier << (format_settings.hilite ? hilite_none : "");
if (!ephemeral_default)
{
settings.ostr << ' ';
default_expression->formatImpl(settings, state, frame);
format_settings.ostr << ' ';
default_expression->formatImpl(format_settings, state, frame);
}
}
if (comment)
{
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COMMENT" << (settings.hilite ? hilite_none : "") << ' ';
comment->formatImpl(settings, state, frame);
format_settings.ostr << ' ' << (format_settings.hilite ? hilite_keyword : "") << "COMMENT" << (format_settings.hilite ? hilite_none : "") << ' ';
comment->formatImpl(format_settings, state, frame);
}
if (codec)
{
settings.ostr << ' ';
codec->formatImpl(settings, state, frame);
format_settings.ostr << ' ';
codec->formatImpl(format_settings, state, frame);
}
if (stat_type)
{
settings.ostr << ' ';
stat_type->formatImpl(settings, state, frame);
format_settings.ostr << ' ';
stat_type->formatImpl(format_settings, state, frame);
}
if (ttl)
{
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "TTL" << (settings.hilite ? hilite_none : "") << ' ';
ttl->formatImpl(settings, state, frame);
format_settings.ostr << ' ' << (format_settings.hilite ? hilite_keyword : "") << "TTL" << (format_settings.hilite ? hilite_none : "") << ' ';
ttl->formatImpl(format_settings, state, frame);
}
if (collation)
{
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COLLATE" << (settings.hilite ? hilite_none : "") << ' ';
collation->formatImpl(settings, state, frame);
format_settings.ostr << ' ' << (format_settings.hilite ? hilite_keyword : "") << "COLLATE" << (format_settings.hilite ? hilite_none : "") << ' ';
collation->formatImpl(format_settings, state, frame);
}
if (settings)
{
format_settings.ostr << ' ' << (format_settings.hilite ? hilite_keyword : "") << "SETTINGS" << (format_settings.hilite ? hilite_none : "") << ' ' << '(';
settings->formatImpl(format_settings, state, frame);
format_settings.ostr << ')';
}
}

View File

@ -22,12 +22,13 @@ public:
ASTPtr stat_type;
ASTPtr ttl;
ASTPtr collation;
ASTPtr settings;
bool primary_key_specifier = false;
String getID(char delim) const override { return "ColumnDeclaration" + (delim + name); }
ASTPtr clone() const override;
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
void formatImpl(const FormatSettings & format_settings, FormatState & state, FormatStateStacked frame) const override;
};
}

View File

@ -111,6 +111,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
ParserKeyword s_comment("COMMENT");
ParserKeyword s_codec("CODEC");
ParserKeyword s_ttl("TTL");
ParserKeyword s_settings("SETTINGS");
ParserKeyword s_remove_ttl("REMOVE TTL");
ParserKeyword s_remove_sample_by("REMOVE SAMPLE BY");
@ -725,9 +726,21 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
command->remove_property = "CODEC";
else if (s_ttl.ignore(pos, expected))
command->remove_property = "TTL";
else if (s_settings.ignore(pos, expected))
command->remove_property = "SETTINGS";
else
return false;
}
else if (s_modify_setting.ignore(pos, expected))
{
if (!parser_settings.parse(pos, command_settings_changes, expected))
return false;
}
else if (s_reset_setting.ignore(pos, expected))
{
if (!parser_reset_setting.parse(pos, command_settings_resets, expected))
return false;
}
else
{
if (s_first.ignore(pos, expected))

View File

@ -10,6 +10,7 @@
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/IParserBase.h>
#include <Parsers/ParserDataType.h>
#include <Parsers/ParserSetQuery.h>
#include <Poco/String.h>
namespace DB
@ -120,8 +121,6 @@ using ParserCompoundColumnDeclaration = IParserColumnDeclaration<ParserCompoundI
template <typename NameParser>
bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
NameParser name_parser;
ParserDataType type_parser;
ParserKeyword s_default{"DEFAULT"};
ParserKeyword s_null{"NULL"};
ParserKeyword s_not{"NOT"};
@ -134,9 +133,15 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
ParserKeyword s_stat{"STATISTIC"};
ParserKeyword s_ttl{"TTL"};
ParserKeyword s_remove{"REMOVE"};
ParserKeyword s_modify_setting("MODIFY SETTING");
ParserKeyword s_reset_setting("RESET SETTING");
ParserKeyword s_settings("SETTINGS");
ParserKeyword s_type{"TYPE"};
ParserKeyword s_collate{"COLLATE"};
ParserKeyword s_primary_key{"PRIMARY KEY"};
NameParser name_parser;
ParserDataType type_parser;
ParserExpression expr_parser;
ParserStringLiteral string_literal_parser;
ParserLiteral literal_parser;
@ -144,6 +149,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
ParserCollation collation_parser;
ParserStatisticType stat_type_parser;
ParserExpression expression_parser;
ParserSetQuery settings_parser(true);
/// mandatory column name
ASTPtr name;
@ -155,11 +161,12 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
/// This keyword may occur only in MODIFY COLUMN query. We check it here
/// because ParserDataType parses types as an arbitrary identifiers and
/// doesn't check that parsed string is existing data type. In this way
/// REMOVE keyword can be parsed as data type and further parsing will fail.
/// So we just check this keyword and in case of success return column
/// declaration with name only.
if (!require_type && s_remove.checkWithoutMoving(pos, expected))
/// doesn't check that parsed string is existing data type. In this way,
/// REMOVE, MODIFY SETTING, or RESET SETTING can be parsed as data type
/// and further parsing will fail. So we just check these keyword and in
/// case of success return column declaration with name only.
if (!require_type
&& (s_remove.checkWithoutMoving(pos, expected) || s_modify_setting.checkWithoutMoving(pos, expected) || s_reset_setting.checkWithoutMoving(pos, expected)))
{
if (!check_keywords_after_name)
return false;
@ -181,6 +188,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
ASTPtr stat_type_expression;
ASTPtr ttl_expression;
ASTPtr collation_expression;
ASTPtr settings;
bool primary_key_specifier = false;
auto null_check_without_moving = [&]() -> bool
@ -321,6 +329,28 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
primary_key_specifier = true;
}
auto old_pos = pos;
if (s_settings.ignore(pos, expected))
{
/// When the keyword `SETTINGS` appear here, it can be a column settings declaration or query settings
/// For example:
/// - Column settings: `ALTER TABLE xx MODIFY COLUMN yy <new_type> SETTINGS (name = value)`
/// - Query settings: ` ALTER TABLE xx MODIFY COLUMN yy <new_type> SETTINGS mutation_sync = 2`
/// So after parsing keyword `SETTINGS`, we check if it's followed by an `(` then it's the column
/// settings, otherwise it's the query settings and we need to move `pos` back to origin position.
ParserToken parser_opening_bracket(TokenType::OpeningRoundBracket);
if (parser_opening_bracket.ignore(pos, expected))
{
if (!settings_parser.parse(pos, settings, expected))
return false;
ParserToken parser_closing_bracket(TokenType::ClosingRoundBracket);
if (!parser_closing_bracket.ignore(pos, expected))
return false;
}
else
pos = old_pos;
}
node = column_declaration;
if (type)
@ -351,6 +381,12 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
column_declaration->children.push_back(std::move(codec_expression));
}
if (settings)
{
column_declaration->settings = settings;
column_declaration->children.push_back(std::move(settings));
}
if (stat_type_expression)
{
column_declaration->stat_type = stat_type_expression;
@ -362,6 +398,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
column_declaration->ttl = ttl_expression;
column_declaration->children.push_back(std::move(ttl_expression));
}
if (collation_expression)
{
column_declaration->collation = collation_expression;

View File

@ -36,6 +36,7 @@
#include <Storages/MergeTree/MergeTreeData.h>
#include <Common/typeid_cast.h>
#include <Common/randomSeed.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <ranges>
@ -74,6 +75,8 @@ AlterCommand::RemoveProperty removePropertyFromString(const String & property)
return AlterCommand::RemoveProperty::CODEC;
else if (property == "TTL")
return AlterCommand::RemoveProperty::TTL;
else if (property == "SETTINGS")
return AlterCommand::RemoveProperty::SETTINGS;
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot remove unknown property '{}'", property);
}
@ -173,6 +176,25 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
if (ast_col_decl.codec)
command.codec = ast_col_decl.codec;
if (ast_col_decl.settings)
command.settings_changes = ast_col_decl.settings->as<ASTSetQuery &>().changes;
/// At most only one of ast_col_decl.settings or command_ast->settings_changes is non-null
if (command_ast->settings_changes)
{
command.settings_changes = command_ast->settings_changes->as<ASTSetQuery &>().changes;
command.append_column_setting = true;
}
if (command_ast->settings_resets)
{
for (const ASTPtr & identifier_ast : command_ast->settings_resets->children)
{
const auto & identifier = identifier_ast->as<ASTIdentifier &>();
command.settings_resets.emplace(identifier.name());
}
}
if (command_ast->column)
command.after_column = getIdentifierName(command_ast->column);
@ -501,6 +523,10 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
{
column.ttl.reset();
}
else if (to_remove == RemoveProperty::SETTINGS)
{
column.settings.clear();
}
else
{
if (codec)
@ -515,6 +541,22 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
if (data_type)
column.type = data_type;
if (!settings_changes.empty())
{
MergeTreeColumnSettings::validate(settings_changes);
if (append_column_setting)
for (const auto & change : settings_changes)
column.settings.setSetting(change.name, change.value);
else
column.settings = settings_changes;
}
if (!settings_resets.empty())
{
for (const auto & setting : settings_resets)
column.settings.removeSetting(setting);
}
/// User specified default expression or changed
/// datatype. We have to replace default.
if (default_expression || data_type)
@ -1357,7 +1399,6 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
ErrorCodes::BAD_ARGUMENTS,
"Column {} doesn't have COMMENT, cannot remove it",
backQuote(column_name));
}
modified_columns.emplace(column_name);

View File

@ -64,7 +64,8 @@ struct AlterCommand
/// Other properties
COMMENT,
CODEC,
TTL
TTL,
SETTINGS
};
Type type = UNKNOWN;
@ -137,10 +138,10 @@ struct AlterCommand
/// For ADD and MODIFY
ASTPtr codec = nullptr;
/// For MODIFY SETTING
/// For MODIFY SETTING or MODIFY COLUMN MODIFY SETTING
SettingsChanges settings_changes;
/// For RESET SETTING
/// For RESET SETTING or MODIFY COLUMN RESET SETTING
std::set<String> settings_resets;
/// For MODIFY_QUERY
@ -155,6 +156,9 @@ struct AlterCommand
/// What to remove from column (or TTL)
RemoveProperty to_remove = RemoveProperty::NO_PROPERTY;
/// Is this MODIFY COLUMN MODIFY SETTING or MODIFY COLUMN column with settings declaration)
bool append_column_setting = false;
static std::optional<AlterCommand> parse(const ASTAlterCommand * command);
void apply(StorageInMemoryMetadata & metadata, ContextPtr context) const;

View File

@ -24,6 +24,7 @@
#include <Interpreters/Context.h>
#include <Storages/IStorage.h>
#include <Common/typeid_cast.h>
#include "Parsers/ASTSetQuery.h"
#include <Core/Defines.h>
#include <Compression/CompressionFactory.h>
#include <Interpreters/ExpressionAnalyzer.h>
@ -72,6 +73,7 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const
&& default_desc == other.default_desc
&& stat == other.stat
&& ast_to_str(codec) == ast_to_str(other.codec)
&& settings == other.settings
&& ast_to_str(ttl) == ast_to_str(other.ttl);
}
@ -104,6 +106,18 @@ void ColumnDescription::writeText(WriteBuffer & buf) const
writeEscapedString(queryToString(codec), buf);
}
if (!settings.empty())
{
writeChar('\t', buf);
DB::writeText("SETTINGS ", buf);
DB::writeText("(", buf);
ASTSetQuery ast;
ast.is_standalone = false;
ast.changes = settings;
writeEscapedString(queryToString(ast), buf);
DB::writeText(")", buf);
}
if (stat)
{
writeChar('\t', buf);
@ -154,6 +168,9 @@ void ColumnDescription::readText(ReadBuffer & buf)
if (col_ast->ttl)
ttl = col_ast->ttl;
if (col_ast->settings)
settings = col_ast->settings->as<ASTSetQuery &>().changes;
}
else
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse column description");

View File

@ -7,6 +7,7 @@
#include <Core/NamesAndAliases.h>
#include <Interpreters/Context_fwd.h>
#include <Storages/ColumnDefault.h>
#include <Common/SettingsChanges.h>
#include <Storages/StatisticsDescription.h>
#include <Common/Exception.h>
@ -83,6 +84,7 @@ struct ColumnDescription
ColumnDefault default_desc;
String comment;
ASTPtr codec;
SettingsChanges settings;
ASTPtr ttl;
std::optional<StatisticDescription> stat;

View File

@ -7,6 +7,7 @@
#include <Columns/ColumnSparse.h>
#include <Common/logger_useful.h>
#include <Storages/BlockNumberColumn.h>
#include <Storages/ColumnsDescription.h>
namespace DB
{
@ -143,13 +144,22 @@ void MergeTreeDataPartWriterWide::addStreams(
auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr);
const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage());
UInt64 max_compress_block_size = 0;
if (column_desc)
if (const auto * value = column_desc->settings.tryGet("max_compress_block_size"))
max_compress_block_size = value->safeGet<UInt64>();
if (!max_compress_block_size)
max_compress_block_size = settings.max_compress_block_size;
column_streams[stream_name] = std::make_unique<Stream<false>>(
stream_name,
data_part->getDataPartStoragePtr(),
stream_name, DATA_FILE_EXTENSION,
stream_name, marks_file_extension,
compression_codec,
settings.max_compress_block_size,
max_compress_block_size,
marks_compression_codec,
settings.marks_compress_block_size,
settings.query_write_settings);
@ -323,6 +333,13 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn(
WrittenOffsetColumns & offset_columns)
{
StreamsWithMarks result;
const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage());
UInt64 min_compress_block_size = 0;
if (column_desc)
if (const auto * value = column_desc->settings.tryGet("min_compress_block_size"))
min_compress_block_size = value->safeGet<UInt64>();
if (!min_compress_block_size)
min_compress_block_size = settings.min_compress_block_size;
data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path)
{
bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes;
@ -335,7 +352,7 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn(
auto & stream = *column_streams[stream_name];
/// There could already be enough data to compress into the new block.
if (stream.compressed_hashing.offset() >= settings.min_compress_block_size)
if (stream.compressed_hashing.offset() >= min_compress_block_size)
stream.compressed_hashing.next();
StreamNameAndMark stream_with_mark;

View File

@ -213,6 +213,27 @@ void MergeTreeSettings::sanityCheck(size_t background_pool_tasks) const
}
}
void MergeTreeColumnSettings::validate(const SettingsChanges & changes)
{
static const MergeTreeSettings merge_tree_settings;
static const std::set<String> allowed_column_level_settings =
{
"min_compress_block_size",
"max_compress_block_size"
};
for (const auto & change : changes)
{
if (!allowed_column_level_settings.contains(change.name))
throw Exception(
ErrorCodes::UNKNOWN_SETTING,
"Setting {} is unknown or not supported at column level, supported settings: {}",
change.name,
fmt::join(allowed_column_level_settings, ", "));
merge_tree_settings.checkCanSet(change.name, change.value);
}
}
std::vector<String> MergeTreeSettings::getAllRegisteredNames() const
{

View File

@ -277,4 +277,11 @@ struct MergeTreeSettings : public BaseSettings<MergeTreeSettingsTraits>, public
using MergeTreeSettingsPtr = std::shared_ptr<const MergeTreeSettings>;
/// Column-level Merge-Tree settings which overwrite MergeTree settings
namespace MergeTreeColumnSettings
{
void validate(const SettingsChanges & changes);
}
}

View File

@ -0,0 +1,18 @@
CREATE TABLE default.tab\n(\n `id` UInt64,\n `long_string` String SETTINGS (min_compress_block_size = 163840, max_compress_block_size = 163840),\n `v1` String,\n `v2` UInt64,\n `v3` Float32,\n `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192
1000
CREATE TABLE default.tab\n(\n `id` UInt64,\n `long_string` String SETTINGS (min_compress_block_size = 8192, max_compress_block_size = 163840),\n `v1` String,\n `v2` UInt64,\n `v3` Float32,\n `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192
CREATE TABLE default.tab\n(\n `id` UInt64,\n `long_string` String SETTINGS (max_compress_block_size = 163840),\n `v1` String,\n `v2` UInt64,\n `v3` Float32,\n `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192
CREATE TABLE default.tab\n(\n `id` UInt64,\n `long_string` String,\n `v1` String,\n `v2` UInt64,\n `v3` Float32,\n `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192
CREATE TABLE default.tab\n(\n `id` UInt64,\n `long_string` String SETTINGS (min_compress_block_size = 163840, max_compress_block_size = 163840),\n `v1` String,\n `v2` UInt64,\n `v3` Float32,\n `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192
---
(0,0) 0
(1,1) 1
(2,2) 2
(3,3) 3
(4,4) 4
(5,5) 5
(6,6) 6
(7,7) 7
(8,8) 8
(9,9) 9
---

View File

@ -0,0 +1,69 @@
-- Tags: no-random-merge-tree-settings, no-replicated-database
-- Tag no-replicated-database: Old syntax is not allowed
-- The test use replicated table to test serialize and deserialize column with settings declaration on zookeeper
-- Tests column-level settings for MergeTree* tables
DROP TABLE IF EXISTS tab;
CREATE TABLE tab
(
id UInt64,
long_string String SETTINGS (min_compress_block_size = 163840, max_compress_block_size = 163840),
v1 String,
v2 UInt64,
v3 Float32,
v4 Float64
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/tab/2870', 'r1')
ORDER BY id
SETTINGS min_bytes_for_wide_part = 1;
SHOW CREATE tab;
INSERT INTO TABLE tab SELECT number, randomPrintableASCII(1000), randomPrintableASCII(10), rand(number), rand(number+1), rand(number+2) FROM numbers(1000);
SELECT count() FROM tab;
ALTER TABLE tab MODIFY COLUMN long_string MODIFY SETTING min_compress_block_size = 8192;
SHOW CREATE tab;
ALTER TABLE tab MODIFY COLUMN long_string RESET SETTING min_compress_block_size;
SHOW CREATE tab;
ALTER TABLE tab MODIFY COLUMN long_string REMOVE SETTINGS;
SHOW CREATE tab;
ALTER TABLE tab MODIFY COLUMN long_string String SETTINGS (min_compress_block_size = 163840, max_compress_block_size = 163840);
SHOW CREATE tab;
DROP TABLE tab;
SELECT '--- ';
SET allow_experimental_object_type = 1;
CREATE TABLE tab
(
id UInt64,
tup Tuple(UInt64, UInt64) SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840),
json JSON SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840),
)
ENGINE = MergeTree
ORDER BY id
SETTINGS min_bytes_for_wide_part = 1;
INSERT INTO TABLE tab SELECT number, tuple(number, number), concat('{"key": ', toString(number), ' ,"value": ', toString(rand(number+1)), '}') FROM numbers(1000);
SELECT tup, json.key AS key FROM tab ORDER BY key LIMIT 10;
DROP TABLE tab;
SELECT '--- ';
-- Unsupported column-level settings are rejected
CREATE TABLE tab
(
id UInt64,
long_string String SETTINGS (min_block_size = 81920, max_compress_block_size = 163840),
)
ENGINE = MergeTree
ORDER BY id
SETTINGS min_bytes_for_wide_part = 1; -- {serverError UNKNOWN_SETTING}