Merge pull request #62601 from vitlibar/avoid-evaluating-table-defaults-while-restoring

Avoid evaluating table DEFAULT expressions while executing RESTORE
This commit is contained in:
Vitaly Baranov 2024-04-15 15:32:42 +00:00 committed by GitHub
commit 4efa03717b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 105 additions and 5 deletions

View File

@ -235,6 +235,7 @@ void DatabasesOverlay::createTableRestoredFromBackup(
/// Creates a tables by executing a "CREATE TABLE" query.
InterpreterCreateQuery interpreter{create_table_query, local_context};
interpreter.setInternal(true);
interpreter.setIsRestoreFromBackup(true);
interpreter.execute();
}

View File

@ -523,6 +523,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
NamesAndTypesList column_names_and_types;
bool make_columns_nullable = mode <= LoadingStrictnessLevel::CREATE && context_->getSettingsRef().data_type_default_nullable;
bool has_columns_with_default_without_type = false;
for (const auto & ast : columns_ast.children)
{
@ -597,14 +598,22 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
setAlias(col_decl.default_expression->clone(), tmp_column_name));
}
else
{
has_columns_with_default_without_type = true;
default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), col_decl.name));
}
}
}
Block defaults_sample_block;
/// set missing types and wrap default_expression's in a conversion-function if necessary
if (!default_expr_list->children.empty())
/// Set missing types and wrap default_expression's in a conversion-function if necessary.
/// We try to avoid that validation while restoring from a backup because it might be slow or troublesome
/// (for example, a default expression can contain dictGet() and that dictionary can access remote servers or
/// require different users to authenticate).
if (!default_expr_list->children.empty() && (has_columns_with_default_without_type || (mode <= LoadingStrictnessLevel::CREATE)))
{
defaults_sample_block = validateColumnsDefaultsAndGetSampleBlock(default_expr_list, column_names_and_types, context_);
}
bool skip_checks = LoadingStrictnessLevel::SECONDARY_CREATE <= mode;
bool sanity_check_compression_codecs = !skip_checks && !context_->getSettingsRef().allow_suspicious_codecs;

View File

@ -1279,6 +1279,93 @@ def test_projection():
)
def test_restore_table_not_evaluate_table_defaults():
instance.query("CREATE DATABASE test")
instance.query(
"CREATE TABLE test.src(key Int64, value Int64) ENGINE=MergeTree ORDER BY key"
)
instance.query(
"INSERT INTO test.src SELECT number as key, number * number AS value FROM numbers(1, 3)"
)
instance.query(
"INSERT INTO test.src SELECT number as key, number * number AS value FROM numbers(6, 3)"
)
instance.query("CREATE USER u1")
instance.query("GRANT SELECT ON test.src TO u1")
instance.query(
"CREATE DICTIONARY test.dict(key Int64, value Int64 DEFAULT -1) PRIMARY KEY key SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 DB 'test' TABLE 'src' USER u1)) LIFETIME(0) LAYOUT(FLAT())"
)
instance.query(
"CREATE TABLE test.tbl(a Int64, b Int64 DEFAULT 0, c Int64 DEFAULT dictGet(test.dict, 'value', b)) ENGINE=MergeTree ORDER BY a"
)
instance.query(
"INSERT INTO test.tbl (a, b) SELECT number, number + 1 FROM numbers(5)"
)
backup_name = new_backup_name()
instance.query(f"BACKUP TABLE system.users, DATABASE test TO {backup_name}")
instance.query("DROP USER u1")
instance.query(
f"RESTORE TABLE system.users, DATABASE test AS test2 FROM {backup_name}"
)
# RESTORE should not try to load dictionary `test2.dict`
assert instance.query("SELECT * FROM test2.tbl ORDER BY a") == TSV(
[[0, 1, 1], [1, 2, 4], [2, 3, 9], [3, 4, -1], [4, 5, -1]]
)
assert (
instance.query(
"SELECT status FROM system.dictionaries WHERE name = 'dict' AND database = 'test2'"
)
== "NOT_LOADED\n"
)
# INSERT needs dictionary `test2.dict` and it will cause loading it.
error = "necessary to have the grant SELECT(key, value) ON test2.src" # User `u1` has no privileges for reading `test2.src`
assert error in instance.query_and_get_error(
"INSERT INTO test2.tbl (a, b) SELECT number, number + 1 FROM numbers(5, 5)"
)
assert (
instance.query(
"SELECT status FROM system.dictionaries WHERE name = 'dict' AND database = 'test2'"
)
== "FAILED\n"
)
instance.query("GRANT SELECT ON test2.src TO u1")
instance.query("SYSTEM RELOAD DICTIONARY test2.dict")
assert (
instance.query(
"SELECT status FROM system.dictionaries WHERE name = 'dict' AND database = 'test2'"
)
== "LOADED\n"
)
instance.query(
"INSERT INTO test2.tbl (a, b) SELECT number, number + 1 FROM numbers(5, 5)"
)
assert instance.query("SELECT * FROM test2.tbl ORDER BY a") == TSV(
[
[0, 1, 1],
[1, 2, 4],
[2, 3, 9],
[3, 4, -1],
[4, 5, -1],
[5, 6, 36],
[6, 7, 49],
[7, 8, 64],
[8, 9, -1],
[9, 10, -1],
]
)
def test_system_functions():
instance.query("CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b;")

View File

@ -62,10 +62,13 @@ DROP TABLE tab;
SET allow_nonconst_timezone_arguments = 1;
CREATE TABLE tab (`country` LowCardinality(FixedString(7)) DEFAULT 'unknown', `city` LowCardinality(String) DEFAULT 'unknown', `region` LowCardinality(String) DEFAULT 'unknown', `continent` LowCardinality(FixedString(7)) DEFAULT 'unknown', `is_eu_country` Bool, `date` DateTime CODEC(DoubleDelta, LZ4), `viewer_date` DateTime ALIAS toTimezone(date, timezone), `device_browser` LowCardinality(String) DEFAULT 'unknown', `metro_code` LowCardinality(String) DEFAULT 'unknown', `domain` String DEFAULT 'unknown', `device_platform` LowCardinality(String) DEFAULT 'unknown', `device_type` LowCardinality(String) DEFAULT 'unknown', `device_vendor` LowCardinality(String) DEFAULT 'unknown', `ip` FixedString(39) DEFAULT 'unknown', `lat` Decimal(8, 6) CODEC(T64), `lng` Decimal(9, 6) CODEC(T64), `asset_id` String DEFAULT 'unknown', `is_personalized` Bool, `metric` String, `origin` String DEFAULT 'unknown', `product_id` UInt64 CODEC(T64), `referer` String DEFAULT 'unknown', `server_side` Int8 CODEC(T64), `third_party_id` String DEFAULT 'unknown', `partner_slug` LowCardinality(FixedString(10)) DEFAULT 'unknown', `user_agent` String DEFAULT 'unknown', `user_id` UUID, `zip` FixedString(10) DEFAULT 'unknown', `timezone` LowCardinality(String), `as_organization` LowCardinality(String) DEFAULT 'unknown', `content_cat` Array(String), `playback_method` LowCardinality(String) DEFAULT 'unknown', `store_id` LowCardinality(String) DEFAULT 'unknown', `store_url` String DEFAULT 'unknown', `timestamp` Nullable(DateTime), `ad_count` Int8 CODEC(T64), `ad_type` LowCardinality(FixedString(10)) DEFAULT 'unknown', `ad_categories` Array(FixedString(8)), `blocked_ad_categories` Array(FixedString(8)), `break_max_ad_length` Int8 CODEC(T64), `break_max_ads` Int8 CODEC(T64), `break_max_duration` Int8 CODEC(T64), `break_min_ad_length` Int8 CODEC(T64), `break_position` LowCardinality(FixedString(18)) DEFAULT 'unknown', `media_playhead` String DEFAULT 'unknown', `placement_type` Int8 CODEC(T64), `transaction_id` String, `universal_ad_id` Array(String), `client_ua` LowCardinality(String) DEFAULT 'unknown', `device_ip` FixedString(39) DEFAULT 'unknown', `device_ua` LowCardinality(String) DEFAULT 'unknown', `ifa` String, `ifa_type` LowCardinality(String) DEFAULT 'unknown', `vast_lat` Decimal(8, 6) CODEC(T64), `vast_long` Decimal(9, 6) CODEC(T64), `server_ua` String DEFAULT 'unknown', `app_bundle` String DEFAULT 'unknown', `page_url` String DEFAULT 'unknown', `api_framework` Array(UInt8), `click_type` LowCardinality(String), `extensions` Array(String), `media_mime` Array(String), `om_id_partner` LowCardinality(String) DEFAULT 'unknown', `player_capabilities` Array(FixedString(12)), `vast_versions` Array(UInt8), `verification_vendors` Array(String), `ad_play_head` String DEFAULT 'unknown', `ad_serving_id` String DEFAULT 'unknown', `asset_uri` String DEFAULT 'unknown', `content_id` String DEFAULT 'unknown', `content_uri` String DEFAULT 'unknown', `inventory_state` Array(FixedString(14)), `player_size` Array(UInt8), `player_state` Array(FixedString(12)), `pod_sequence` Int8 CODEC(T64), `click_position` Array(UInt32), `error_code` Int16 CODEC(T64), `error_reason` Int8 CODEC(T64), `gdpr_consent` String DEFAULT 'unknown', `limited_tracking` Bool, `regulations` String DEFAULT 'unknown', `content_category` Array(String), PROJECTION projection_TPAG_VAST_date (SELECT * ORDER BY toYYYYMMDD(date), metric, product_id, asset_id)) ENGINE = MergeTree ORDER BY (product_id, metric, asset_id, toYYYYMMDD(date));
DETACH TABLE tab;
ATTACH TABLE tab SETTINGS allow_nonconst_timezone_arguments = 0; -- { serverError ILLEGAL_COLUMN }
ATTACH TABLE tab SETTINGS allow_nonconst_timezone_arguments = 1;
SET allow_nonconst_timezone_arguments = 0;
-- ATTACH TABLE doesn't check the default expressions
ATTACH TABLE tab;
DROP TABLE tab;
-- CREATE TABLE does check the default expessions, so the following is expected to fail:
CREATE TABLE tab (`country` LowCardinality(FixedString(7)) DEFAULT 'unknown', `city` LowCardinality(String) DEFAULT 'unknown', `region` LowCardinality(String) DEFAULT 'unknown', `continent` LowCardinality(FixedString(7)) DEFAULT 'unknown', `is_eu_country` Bool, `date` DateTime CODEC(DoubleDelta, LZ4), `viewer_date` DateTime ALIAS toTimezone(date, timezone), `device_browser` LowCardinality(String) DEFAULT 'unknown', `metro_code` LowCardinality(String) DEFAULT 'unknown', `domain` String DEFAULT 'unknown', `device_platform` LowCardinality(String) DEFAULT 'unknown', `device_type` LowCardinality(String) DEFAULT 'unknown', `device_vendor` LowCardinality(String) DEFAULT 'unknown', `ip` FixedString(39) DEFAULT 'unknown', `lat` Decimal(8, 6) CODEC(T64), `lng` Decimal(9, 6) CODEC(T64), `asset_id` String DEFAULT 'unknown', `is_personalized` Bool, `metric` String, `origin` String DEFAULT 'unknown', `product_id` UInt64 CODEC(T64), `referer` String DEFAULT 'unknown', `server_side` Int8 CODEC(T64), `third_party_id` String DEFAULT 'unknown', `partner_slug` LowCardinality(FixedString(10)) DEFAULT 'unknown', `user_agent` String DEFAULT 'unknown', `user_id` UUID, `zip` FixedString(10) DEFAULT 'unknown', `timezone` LowCardinality(String), `as_organization` LowCardinality(String) DEFAULT 'unknown', `content_cat` Array(String), `playback_method` LowCardinality(String) DEFAULT 'unknown', `store_id` LowCardinality(String) DEFAULT 'unknown', `store_url` String DEFAULT 'unknown', `timestamp` Nullable(DateTime), `ad_count` Int8 CODEC(T64), `ad_type` LowCardinality(FixedString(10)) DEFAULT 'unknown', `ad_categories` Array(FixedString(8)), `blocked_ad_categories` Array(FixedString(8)), `break_max_ad_length` Int8 CODEC(T64), `break_max_ads` Int8 CODEC(T64), `break_max_duration` Int8 CODEC(T64), `break_min_ad_length` Int8 CODEC(T64), `break_position` LowCardinality(FixedString(18)) DEFAULT 'unknown', `media_playhead` String DEFAULT 'unknown', `placement_type` Int8 CODEC(T64), `transaction_id` String, `universal_ad_id` Array(String), `client_ua` LowCardinality(String) DEFAULT 'unknown', `device_ip` FixedString(39) DEFAULT 'unknown', `device_ua` LowCardinality(String) DEFAULT 'unknown', `ifa` String, `ifa_type` LowCardinality(String) DEFAULT 'unknown', `vast_lat` Decimal(8, 6) CODEC(T64), `vast_long` Decimal(9, 6) CODEC(T64), `server_ua` String DEFAULT 'unknown', `app_bundle` String DEFAULT 'unknown', `page_url` String DEFAULT 'unknown', `api_framework` Array(UInt8), `click_type` LowCardinality(String), `extensions` Array(String), `media_mime` Array(String), `om_id_partner` LowCardinality(String) DEFAULT 'unknown', `player_capabilities` Array(FixedString(12)), `vast_versions` Array(UInt8), `verification_vendors` Array(String), `ad_play_head` String DEFAULT 'unknown', `ad_serving_id` String DEFAULT 'unknown', `asset_uri` String DEFAULT 'unknown', `content_id` String DEFAULT 'unknown', `content_uri` String DEFAULT 'unknown', `inventory_state` Array(FixedString(14)), `player_size` Array(UInt8), `player_state` Array(FixedString(12)), `pod_sequence` Int8 CODEC(T64), `click_position` Array(UInt32), `error_code` Int16 CODEC(T64), `error_reason` Int8 CODEC(T64), `gdpr_consent` String DEFAULT 'unknown', `limited_tracking` Bool, `regulations` String DEFAULT 'unknown', `content_category` Array(String), PROJECTION projection_TPAG_VAST_date (SELECT * ORDER BY toYYYYMMDD(date), metric, product_id, asset_id)) ENGINE = MergeTree ORDER BY (product_id, metric, asset_id, toYYYYMMDD(date)); -- { serverError ILLEGAL_COLUMN }