Merge branch 'ClickHouse:master' into master

This commit is contained in:
iceFireser 2024-08-24 17:18:54 +08:00 committed by GitHub
commit 6afe3fc500
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
35 changed files with 488 additions and 303 deletions

View File

@ -80,7 +80,7 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
`PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional.
Specifying a sorting key (using `ORDER BY` clause) implicitly specifies a primary key.
It is usually not necessary to specify the primary key in addition to the primary key.
It is usually not necessary to specify the primary key in addition to the sorting key.
#### SAMPLE BY

View File

@ -120,7 +120,7 @@ void RoleCache::collectEnabledRoles(EnabledRoles & enabled_roles, SubscriptionsO
SubscriptionsOnRoles new_subscriptions_on_roles;
new_subscriptions_on_roles.reserve(subscriptions_on_roles.size());
auto get_role_function = [this, &subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, subscriptions_on_roles); };
auto get_role_function = [this, &new_subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, new_subscriptions_on_roles); };
for (const auto & current_role : enabled_roles.params.current_roles)
collectRoles(*new_info, skip_ids, get_role_function, current_role, true, false);

View File

@ -72,11 +72,13 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"24.9",
{
{"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
{"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
{"create_if_not_exists", false, false, "New setting."},
{"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
}
},
{"24.8",
{
{"create_if_not_exists", false, false, "New setting."},
{"rows_before_aggregation", true, true, "Provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation"},
{"restore_replace_external_table_functions_to_null", false, false, "New setting."},
{"restore_replace_external_engines_to_null", false, false, "New setting."},
@ -85,7 +87,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"use_hive_partitioning", false, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines."},
{"allow_experimental_kafka_offsets_storage_in_keeper", false, false, "Allow the usage of experimental Kafka storage engine that stores the committed offsets in ClickHouse Keeper"},
{"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
{"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
{"query_cache_tag", "", "", "New setting for labeling query cache settings."},
{"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"},
{"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
@ -93,7 +94,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"allow_experimental_json_type", false, false, "Add new experimental JSON type"},
{"use_json_alias_for_old_object_type", true, false, "Use JSON type alias to create new JSON type"},
{"type_json_skip_duplicated_paths", false, false, "Allow to skip duplicated paths during JSON parsing"},
{"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
{"allow_experimental_vector_similarity_index", false, false, "Added new setting to allow experimental vector similarity indexes"},
{"input_format_try_infer_datetimes_only_datetime64", true, false, "Allow to infer DateTime instead of DateTime64 in data formats"}
}

View File

@ -113,7 +113,15 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const
&& ast_to_str(ttl) == ast_to_str(other.ttl);
}
void ColumnDescription::writeText(WriteBuffer & buf) const
String formatASTStateAware(IAST & ast, IAST::FormatState & state)
{
WriteBufferFromOwnString buf;
IAST::FormatSettings settings(buf, true, false);
ast.formatImpl(settings, state, IAST::FormatStateStacked());
return buf.str();
}
void ColumnDescription::writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const
{
/// NOTE: Serialization format is insane.
@ -126,20 +134,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const
writeChar('\t', buf);
DB::writeText(DB::toString(default_desc.kind), buf);
writeChar('\t', buf);
writeEscapedString(queryToString(default_desc.expression), buf);
writeEscapedString(formatASTStateAware(*default_desc.expression, state), buf);
}
if (!comment.empty())
if (!comment.empty() && include_comment)
{
writeChar('\t', buf);
DB::writeText("COMMENT ", buf);
writeEscapedString(queryToString(ASTLiteral(Field(comment))), buf);
auto ast = ASTLiteral(Field(comment));
writeEscapedString(formatASTStateAware(ast, state), buf);
}
if (codec)
{
writeChar('\t', buf);
writeEscapedString(queryToString(codec), buf);
writeEscapedString(formatASTStateAware(*codec, state), buf);
}
if (!settings.empty())
@ -150,21 +159,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const
ASTSetQuery ast;
ast.is_standalone = false;
ast.changes = settings;
writeEscapedString(queryToString(ast), buf);
writeEscapedString(formatASTStateAware(ast, state), buf);
DB::writeText(")", buf);
}
if (!statistics.empty())
{
writeChar('\t', buf);
writeEscapedString(queryToString(statistics.getAST()), buf);
writeEscapedString(formatASTStateAware(*statistics.getAST(), state), buf);
}
if (ttl)
{
writeChar('\t', buf);
DB::writeText("TTL ", buf);
writeEscapedString(queryToString(ttl), buf);
writeEscapedString(formatASTStateAware(*ttl, state), buf);
}
writeChar('\n', buf);
@ -895,16 +904,17 @@ void ColumnsDescription::resetColumnTTLs()
}
String ColumnsDescription::toString() const
String ColumnsDescription::toString(bool include_comments) const
{
WriteBufferFromOwnString buf;
IAST::FormatState ast_format_state;
writeCString("columns format version: 1\n", buf);
DB::writeText(columns.size(), buf);
writeCString(" columns:\n", buf);
for (const ColumnDescription & column : columns)
column.writeText(buf);
column.writeText(buf, ast_format_state, include_comments);
return buf.str();
}

View File

@ -104,7 +104,7 @@ struct ColumnDescription
bool operator==(const ColumnDescription & other) const;
bool operator!=(const ColumnDescription & other) const { return !(*this == other); }
void writeText(WriteBuffer & buf) const;
void writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const;
void readText(ReadBuffer & buf);
};
@ -137,7 +137,7 @@ public:
/// NOTE Must correspond with Nested::flatten function.
void flattenNested(); /// TODO: remove, insert already flattened Nested columns.
bool operator==(const ColumnsDescription & other) const { return columns == other.columns; }
bool operator==(const ColumnsDescription & other) const { return toString(false) == other.toString(false); }
bool operator!=(const ColumnsDescription & other) const { return !(*this == other); }
auto begin() const { return columns.begin(); }
@ -221,7 +221,7 @@ public:
/// Does column has non default specified compression codec
bool hasCompressionCodec(const String & column_name) const;
String toString() const;
String toString(bool include_comments = true) const;
static ColumnsDescription parse(const String & str);
size_t size() const

View File

@ -444,8 +444,8 @@ StorageHive::StorageHive(
storage_metadata.setComment(comment_);
storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext());
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, getContext()));
setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), getContext()));
}
void StorageHive::lazyInitialize()

View File

@ -94,7 +94,7 @@ StorageObjectStorage::StorageObjectStorage(
if (sample_path.empty() && context->getSettingsRef().use_hive_partitioning)
sample_path = getPathSample(metadata, context);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context, sample_path, format_settings));
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context, sample_path, format_settings));
setInMemoryMetadata(metadata);
}

View File

@ -68,7 +68,7 @@ StorageObjectStorageCluster::StorageObjectStorageCluster(
if (sample_path.empty() && context_->getSettingsRef().use_hive_partitioning)
sample_path = getPathSample(metadata, context_);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context_, sample_path));
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context_, sample_path));
setInMemoryMetadata(metadata);
}

View File

@ -208,7 +208,7 @@ Chunk StorageObjectStorageSource::generate()
.filename = &filename,
.last_modified = object_info->metadata->last_modified,
.etag = &(object_info->metadata->etag)
}, getContext(), read_from_format_info.columns_description);
}, getContext());
const auto & partition_columns = configuration->getPartitionColumns();
if (!partition_columns.empty() && chunk_size && chunk.hasColumns())
@ -280,7 +280,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
const std::shared_ptr<IIterator> & file_iterator,
const ConfigurationPtr & configuration,
const ObjectStoragePtr & object_storage,
const ReadFromFormatInfo & read_from_format_info,
ReadFromFormatInfo & read_from_format_info,
const std::optional<FormatSettings> & format_settings,
const std::shared_ptr<const KeyCondition> & key_condition_,
const ContextPtr & context_,

View File

@ -74,7 +74,7 @@ protected:
const UInt64 max_block_size;
const bool need_only_count;
const size_t max_parsing_threads;
const ReadFromFormatInfo read_from_format_info;
ReadFromFormatInfo read_from_format_info;
const std::shared_ptr<ThreadPool> create_reader_pool;
std::shared_ptr<IIterator> file_iterator;
@ -122,7 +122,7 @@ protected:
const std::shared_ptr<IIterator> & file_iterator,
const ConfigurationPtr & configuration,
const ObjectStoragePtr & object_storage,
const ReadFromFormatInfo & read_from_format_info,
ReadFromFormatInfo & read_from_format_info,
const std::optional<FormatSettings> & format_settings,
const std::shared_ptr<const KeyCondition> & key_condition_,
const ContextPtr & context_,

View File

@ -524,7 +524,7 @@ Chunk ObjectStorageQueueSource::generateImpl()
{
.path = path,
.size = reader.getObjectInfo()->metadata->size_bytes
}, getContext(), read_from_format_info.columns_description);
}, getContext());
return chunk;
}

View File

@ -128,7 +128,7 @@ private:
const std::shared_ptr<FileIterator> file_iterator;
const ConfigurationPtr configuration;
const ObjectStoragePtr object_storage;
const ReadFromFormatInfo read_from_format_info;
ReadFromFormatInfo read_from_format_info;
const std::optional<FormatSettings> format_settings;
const ObjectStorageQueueSettings queue_settings;
const std::shared_ptr<ObjectStorageQueueMetadata> files_metadata;

View File

@ -169,7 +169,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue(
storage_metadata.setColumns(columns);
storage_metadata.setConstraints(constraints_);
storage_metadata.setComment(comment);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_));
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_));
setInMemoryMetadata(storage_metadata);
LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());

View File

@ -1112,9 +1112,9 @@ void StorageFile::setStorageMetadata(CommonArguments args)
storage_metadata.setConstraints(args.constraints);
storage_metadata.setComment(args.comment);
setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), args.getContext(), paths.empty() ? "" : paths[0], format_settings));
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, args.getContext(), paths.empty() ? "" : paths[0], format_settings));
setInMemoryMetadata(storage_metadata);
}
@ -1468,7 +1468,7 @@ Chunk StorageFileSource::generate()
.size = current_file_size,
.filename = (filename_override.has_value() ? &filename_override.value() : nullptr),
.last_modified = current_file_last_modified
}, getContext(), columns_description);
}, getContext());
return chunk;
}

View File

@ -60,8 +60,8 @@ StorageFileCluster::StorageFileCluster(
}
storage_metadata.setConstraints(constraints_);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, paths.empty() ? "" : paths[0]));
setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, paths.empty() ? "" : paths[0]));
}
void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context)

View File

@ -6340,7 +6340,7 @@ void StorageReplicatedMergeTree::alter(
"Metadata on replica is not up to date with common metadata in Zookeeper. "
"It means that this replica still not applied some of previous alters."
" Probably too many alters executing concurrently (highly not recommended). "
"You can retry the query");
"You can retry this error");
/// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level.
if (query_context->getZooKeeperMetadataTransaction())

View File

@ -165,9 +165,9 @@ IStorageURLBase::IStorageURLBase(
storage_metadata.setConstraints(constraints_);
storage_metadata.setComment(comment);
setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_, getSampleURI(uri, context_), format_settings));
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_, getSampleURI(uri, context_), format_settings));
setInMemoryMetadata(storage_metadata);
}
@ -435,7 +435,7 @@ Chunk StorageURLSource::generate()
{
.path = curr_uri.getPath(),
.size = current_file_size,
}, getContext(), columns_description);
}, getContext());
return chunk;
}

View File

@ -75,8 +75,8 @@ StorageURLCluster::StorageURLCluster(
}
storage_metadata.setConstraints(constraints_);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, getSampleURI(uri, context)));
setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, getSampleURI(uri, context)));
}
void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context)

View File

@ -129,36 +129,45 @@ NameSet getVirtualNamesForFileLikeStorage()
return {"_path", "_file", "_size", "_time", "_etag"};
}
std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path, const ColumnsDescription & storage_columns)
std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path)
{
std::string pattern = "([^/]+)=([^/]+)/";
re2::StringPiece input_piece(path);
std::unordered_map<std::string, std::string> key_values;
std::string key, value;
std::unordered_set<String> used_keys;
std::unordered_map<std::string, std::string> used_keys;
while (RE2::FindAndConsume(&input_piece, pattern, &key, &value))
{
if (used_keys.contains(key))
throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {}, only unique keys are allowed", path, key);
used_keys.insert(key);
auto it = used_keys.find(key);
if (it != used_keys.end() && it->second != value)
throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {} with different values, only unique keys are allowed", path, key);
used_keys.insert({key, value});
auto col_name = "_" + key;
while (storage_columns.has(col_name))
col_name = "_" + col_name;
auto col_name = key;
key_values[col_name] = value;
}
return key_values;
}
VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional<FormatSettings> format_settings_)
VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional<FormatSettings> format_settings_)
{
VirtualColumnsDescription desc;
auto add_virtual = [&](const auto & name, const auto & type)
{
if (storage_columns.has(name))
{
if (!context->getSettingsRef().use_hive_partitioning)
return;
if (storage_columns.size() == 1)
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use hive partitioning for file {}: it contains only partition columns. Disable use_hive_partitioning setting to read this file", path);
auto local_type = storage_columns.get(name).type;
storage_columns.remove(name);
desc.addEphemeral(name, local_type, "");
return;
}
desc.addEphemeral(name, type, "");
};
@ -171,7 +180,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription
if (context->getSettingsRef().use_hive_partitioning)
{
auto map = parseHivePartitioningKeysAndValues(path, storage_columns);
auto map = parseHivePartitioningKeysAndValues(path);
auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context);
for (auto & item : map)
{
@ -244,11 +253,11 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
void addRequestedFileLikeStorageVirtualsToChunk(
Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns)
VirtualsForFileLikeStorage virtual_values, ContextPtr context)
{
std::unordered_map<std::string, std::string> hive_map;
if (context->getSettingsRef().use_hive_partitioning)
hive_map = parseHivePartitioningKeysAndValues(virtual_values.path, columns);
hive_map = parseHivePartitioningKeysAndValues(virtual_values.path);
for (const auto & virtual_column : requested_virtual_columns)
{

View File

@ -70,7 +70,7 @@ auto extractSingleValueFromBlock(const Block & block, const String & name)
NameSet getVirtualNamesForFileLikeStorage();
VirtualColumnsDescription getVirtualsForFileLikeStorage(
const ColumnsDescription & storage_columns,
ColumnsDescription & storage_columns,
const ContextPtr & context,
const std::string & sample_path = "",
std::optional<FormatSettings> format_settings_ = std::nullopt);
@ -105,7 +105,7 @@ struct VirtualsForFileLikeStorage
void addRequestedFileLikeStorageVirtualsToChunk(
Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns);
VirtualsForFileLikeStorage virtual_values, ContextPtr context);
}
}

View File

@ -60,7 +60,6 @@ MESSAGES_TO_RETRY = [
"is already started to be removing by another replica right now",
# This is from LSan, and it indicates its own internal problem:
"Unable to get registers from thread",
"You can retry",
]
MAX_RETRIES = 3

View File

@ -0,0 +1,26 @@
<clickhouse>
<keeper_server>
<tcp_port>2181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<session_timeout_ms>20000</session_timeout_ms>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>localhost</hostname>
<port>9444</port>
</server>
</raft_configuration>
</keeper_server>
<zookeeper>
<node index="1">
<host>localhost</host>
<port>2181</port>
</node>
<session_timeout_ms>20000</session_timeout_ms>
</zookeeper>
</clickhouse>

View File

@ -0,0 +1,8 @@
<clickhouse>
<users>
<default>
<profile>default</profile>
<no_password></no_password>
</default>
</users>
</clickhouse>

View File

@ -0,0 +1,71 @@
import pytest
import random
import string
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance(
"node",
main_configs=[
"config/enable_keeper.xml",
"config/users.xml",
],
stay_alive=True,
with_minio=True,
macros={"shard": 1, "replica": 1},
)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def randomize_table_name(table_name, random_suffix_length=10):
letters = string.ascii_letters + string.digits
return f"{table_name}_{''.join(random.choice(letters) for _ in range(random_suffix_length))}"
@pytest.mark.parametrize("engine", ["ReplicatedMergeTree"])
def test_aliases_in_default_expr_not_break_table_structure(start_cluster, engine):
"""
Making sure that using aliases in columns' default expressions does not lead to having different columns metadata in ZooKeeper and on disk.
Issue: https://github.com/ClickHouse/clickhouse-private/issues/5150
"""
data = '{"event": {"col1-key": "col1-val", "col2-key": "col2-val"}}'
table_name = randomize_table_name("t")
node.query(
f"""
DROP TABLE IF EXISTS {table_name};
CREATE TABLE {table_name}
(
`data` String,
`col1` String DEFAULT JSONExtractString(JSONExtractString(data, 'event') AS event, 'col1-key'),
`col2` String MATERIALIZED JSONExtractString(JSONExtractString(data, 'event') AS event, 'col2-key')
)
ENGINE = {engine}('/test/{table_name}', '{{replica}}')
ORDER BY col1
"""
)
node.restart_clickhouse()
node.query(
f"""
INSERT INTO {table_name} (data) VALUES ('{data}');
"""
)
assert node.query(f"SELECT data FROM {table_name}").strip() == data
assert node.query(f"SELECT col1 FROM {table_name}").strip() == "col1-val"
assert node.query(f"SELECT col2 FROM {table_name}").strip() == "col2-val"
node.query(f"DROP TABLE {table_name}")

View File

@ -1,5 +1,6 @@
import time
import pytest
import random
from helpers.client import QueryRuntimeException
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV
@ -418,72 +419,215 @@ def test_function_current_roles():
)
def test_role_expiration():
instance.query("CREATE USER ure")
@pytest.mark.parametrize("with_extra_role", [False, True])
def test_role_expiration(with_extra_role):
instance.query("CREATE ROLE rre")
instance.query("GRANT rre TO ure")
instance.query("CREATE USER ure DEFAULT ROLE rre")
instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log")
instance.query("INSERT INTO tre VALUES (0)")
instance.query("CREATE TABLE table1 (id Int) Engine=Log")
instance.query("CREATE TABLE table2 (id Int) Engine=Log")
instance.query("INSERT INTO table1 VALUES (1)")
instance.query("INSERT INTO table2 VALUES (2)")
instance.query("GRANT SELECT ON table1 TO rre")
assert instance.query("SELECT * FROM table1", user="ure") == "1\n"
assert "Not enough privileges" in instance.query_and_get_error(
"SELECT * FROM tre", user="ure"
"SELECT * FROM table2", user="ure"
)
instance.query("GRANT SELECT ON tre TO rre")
assert instance.query("SELECT * FROM tre", user="ure") == "0\n"
# access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test
# so we wait >2 seconds until the role is expired
time.sleep(5)
instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log")
instance.query("INSERT INTO tre1 VALUES (0)")
instance.query("GRANT SELECT ON tre1 TO rre")
if with_extra_role:
# Expiration of role "rre" from the role cache can be caused by another role being used.
instance.query("CREATE ROLE extra_role")
instance.query("CREATE USER extra_user DEFAULT ROLE extra_role")
instance.query("GRANT SELECT ON table1 TO extra_role")
assert instance.query("SELECT * FROM table1", user="extra_user") == "1\n"
assert instance.query("SELECT * from tre1", user="ure") == "0\n"
instance.query("GRANT SELECT ON table2 TO rre")
assert instance.query("SELECT * FROM table1", user="ure") == "1\n"
assert instance.query("SELECT * FROM table2", user="ure") == "2\n"
instance.query("DROP USER ure")
instance.query("DROP ROLE rre")
instance.query("DROP TABLE tre")
instance.query("DROP TABLE tre1")
instance.query("DROP USER ure")
instance.query("DROP TABLE table1")
instance.query("DROP TABLE table2")
if with_extra_role:
instance.query("DROP ROLE extra_role")
instance.query("DROP USER extra_user")
def test_two_roles_expiration():
instance.query("CREATE USER ure")
instance.query("CREATE ROLE rre")
instance.query("GRANT rre TO ure")
def test_roles_cache():
# This test takes 20 seconds.
test_time = 20
instance.query("CREATE ROLE rre_second")
instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log")
instance.query("INSERT INTO tre VALUES (0)")
assert "Not enough privileges" in instance.query_and_get_error(
"SELECT * FROM tre", user="ure"
)
instance.query("GRANT SELECT ON tre TO rre")
assert instance.query("SELECT * FROM tre", user="ure") == "0\n"
# access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test
# so we wait >2 seconds until the roles are expired
time.sleep(5)
# Three users A, B, C.
users = ["A", "B", "C"]
instance.query("CREATE USER " + ", ".join(users))
# Table "tbl" has 10 columns. Each of the users has access to a different set of columns.
num_columns = 10
columns = [f"x{i}" for i in range(1, num_columns + 1)]
columns_with_types = [column + " Int64" for column in columns]
columns_with_types_comma_separated = ", ".join(columns_with_types)
values = list(range(1, num_columns + 1))
values_comma_separated = ", ".join([str(value) for value in values])
instance.query(
"GRANT SELECT ON tre1 TO rre_second"
) # we expect that both rre and rre_second are gone from cache upon this operation
f"CREATE TABLE tbl ({columns_with_types_comma_separated}) ENGINE=MergeTree ORDER BY tuple()"
)
instance.query(f"INSERT INTO tbl VALUES ({values_comma_separated})")
columns_to_values = dict([(f"x{i}", i) for i in range(1, num_columns + 1)])
instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log")
instance.query("INSERT INTO tre1 VALUES (0)")
instance.query("GRANT SELECT ON tre1 TO rre")
# In this test we create and modify roles multiple times along with updating the following variables.
# Then we check that each of the users has access to the expected set of columns.
roles = []
users_to_roles = dict([(user, []) for user in users])
roles_to_columns = {}
assert instance.query("SELECT * from tre1", user="ure") == "0\n"
# Checks that each of the users can access the expected set of columns and can't access other columns.
def check():
for user in random.sample(users, len(users)):
expected_roles = users_to_roles[user]
expected_columns = list(
set(sum([roles_to_columns[role] for role in expected_roles], []))
)
expected_result = sorted(
[columns_to_values[column] for column in expected_columns]
)
query = " UNION ALL ".join(
[
f"SELECT * FROM viewIfPermitted(SELECT {column} AS c FROM tbl ELSE null('c Int64'))"
for column in columns
]
)
result = instance.query(query, user=user).splitlines()
result = sorted([int(value) for value in result])
ok = result == expected_result
if not ok:
print(f"Show grants for {user}:")
print(
instance.query(
"SHOW GRANTS FOR " + ", ".join([user] + expected_roles)
)
)
print(f"Expected result: {expected_result}")
print(f"Got unexpected result: {result}")
assert ok
instance.query("DROP USER ure")
instance.query("DROP ROLE rre")
instance.query("DROP ROLE rre_second")
instance.query("DROP TABLE tre")
instance.query("DROP TABLE tre1")
# Grants one of our roles a permission to access one of the columns.
def grant_column():
columns_used_in_roles = sum(roles_to_columns.values(), [])
columns_to_choose = [
column for column in columns if column not in columns_used_in_roles
]
if not columns_to_choose or not roles:
return False
column = random.choice(columns_to_choose)
role = random.choice(roles)
instance.query(f"GRANT SELECT({column}) ON tbl TO {role}")
roles_to_columns[role].append(column)
return True
# Revokes a permission to access one of the granted column from all our roles.
def revoke_column():
columns_used_in_roles = sum(roles_to_columns.values(), [])
columns_to_choose = list(set(columns_used_in_roles))
if not columns_to_choose or not roles:
return False
column = random.choice(columns_to_choose)
roles_str = ", ".join(roles)
instance.query(f"REVOKE SELECT({column}) ON tbl FROM {roles_str}")
for role in roles_to_columns:
if column in roles_to_columns[role]:
roles_to_columns[role].remove(column)
return True
# Creates a role and grants it to one of the users.
def create_role():
for role in ["R1", "R2", "R3"]:
if role not in roles:
instance.query(f"CREATE ROLE {role}")
roles.append(role)
if role not in roles_to_columns:
roles_to_columns[role] = []
if "R1" not in users_to_roles["A"]:
instance.query("GRANT R1 TO A")
users_to_roles["A"].append("R1")
elif "R2" not in users_to_roles["B"]:
instance.query("GRANT R2 TO B")
users_to_roles["B"].append("R2")
elif "R3" not in users_to_roles["B"]:
instance.query("GRANT R3 TO R2")
users_to_roles["B"].append("R3")
elif "R3" not in users_to_roles["C"]:
instance.query("GRANT R3 TO C")
users_to_roles["C"].append("R3")
else:
return False
return True
# Drops one of our roles.
def drop_role():
if not roles:
return False
role = random.choice(roles)
instance.query(f"DROP ROLE {role}")
roles.remove(role)
for u in users_to_roles:
if role in users_to_roles[u]:
users_to_roles[u].remove(role)
del roles_to_columns[role]
if (role == "R2") and ("R3" in users_to_roles["B"]):
users_to_roles["B"].remove("R3")
return True
# Modifies some grants or roles randomly.
def modify():
while True:
rnd = random.random()
if rnd < 0.4:
if grant_column():
break
elif rnd < 0.5:
if revoke_column():
break
elif rnd < 0.9:
if create_role():
break
else:
if drop_role():
break
def maybe_modify():
if random.random() < 0.9:
modify()
modify()
# Sleeping is necessary in this test because the role cache in ClickHouse has expiration timeout.
def maybe_sleep():
if random.random() < 0.1:
# "role_cache_expiration_time_seconds" is set to 2 seconds in the test configuration.
# We need a sleep longer than that in this test sometimes.
seconds = random.random() * 5
print(f"Sleeping {seconds} seconds")
time.sleep(seconds)
# Main part of the test.
start_time = time.time()
end_time = start_time + test_time
while time.time() < end_time:
check()
maybe_sleep()
maybe_modify()
maybe_sleep()
check()
instance.query("DROP USER " + ", ".join(users))
instance.query("DROP ROLE " + ", ".join(roles))
instance.query("DROP TABLE tbl")

View File

@ -1513,19 +1513,19 @@ def test_hive_partitioning_with_one_parameter(cluster):
azure_query(
node,
f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}",
f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values}",
settings={"azure_truncate_on_insert": 1},
)
query = (
f"SELECT column1, column2, _file, _path, _column1 FROM azureBlobStorage(azure_conf2, "
f"SELECT column2, _file, _path, column1 FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}')"
f"blob_path='{path}', format='CSVWithNames', structure='{table_format}')"
)
assert azure_query(
node, query, settings={"use_hive_partitioning": 1}
).splitlines() == [
"Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format(
"Gordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format(
bucket="cont", max_path=path
)
]
@ -1533,14 +1533,14 @@ def test_hive_partitioning_with_one_parameter(cluster):
query = (
f"SELECT column2 FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');"
)
assert azure_query(
node, query, settings={"use_hive_partitioning": 1}
).splitlines() == ["Gordon"]
def test_hive_partitioning_with_two_parameters(cluster):
def test_hive_partitioning_with_all_parameters(cluster):
# type: (ClickHouseCluster) -> None
node = cluster.instances["node"] # type: ClickHouseInstance
table_format = "column1 String, column2 String"
@ -1551,40 +1551,19 @@ def test_hive_partitioning_with_two_parameters(cluster):
azure_query(
node,
f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
settings={"azure_truncate_on_insert": 1},
)
query = (
f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');"
)
assert azure_query(
node, query, settings={"use_hive_partitioning": 1}
).splitlines() == [
"Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth\tGordon".format(
bucket="cont", max_path=path
)
]
pattern = r"DB::Exception: Cannot use hive partitioning for file"
query = (
f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2;"
)
assert azure_query(
node, query, settings={"use_hive_partitioning": 1}
).splitlines() == ["Elizabeth"]
query = (
f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2 AND column1=_column1;"
)
assert azure_query(
node, query, settings={"use_hive_partitioning": 1}
).splitlines() == ["Elizabeth"]
with pytest.raises(Exception, match=pattern):
azure_query(node, query, settings={"use_hive_partitioning": 1})
def test_hive_partitioning_without_setting(cluster):
@ -1593,19 +1572,19 @@ def test_hive_partitioning_without_setting(cluster):
table_format = "column1 String, column2 String"
values_1 = f"('Elizabeth', 'Gordon')"
values_2 = f"('Emilia', 'Gregor')"
path = "a/column1=Elizabeth/column2=Gordon/sample.csv"
path = "a/column1=Elizabeth/column2=Gordon/column3=Gordon/sample.csv"
azure_query(
node,
f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
settings={"azure_truncate_on_insert": 1},
)
query = (
f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
f"SELECT column1, column2, _file, _path, column3 FROM azureBlobStorage(azure_conf2, "
f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');"
)
pattern = re.compile(
r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL

View File

@ -1259,33 +1259,21 @@ def test_respect_object_existence_on_partitioned_write(started_cluster):
def test_hive_partitioning_with_one_parameter(started_cluster):
hdfs_api = started_cluster.hdfs_api
hdfs_api.write_data(f"/column0=Elizabeth/parquet_1", f"Elizabeth\tGordon\n")
assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n"
hdfs_api.write_data(
f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n"
)
assert (
hdfs_api.read_data(f"/column0=Elizabeth/file_1")
== f"column0,column1\nElizabeth,Gordon\n"
)
r = node1.query(
"SELECT _column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
"SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/file_1', 'CSVWithNames')",
settings={"use_hive_partitioning": 1},
)
assert r == f"Elizabeth\n"
def test_hive_partitioning_with_two_parameters(started_cluster):
hdfs_api = started_cluster.hdfs_api
hdfs_api.write_data(
f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n"
)
assert (
hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2")
== f"Elizabeth\tGordon\n"
)
r = node1.query(
"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
settings={"use_hive_partitioning": 1},
)
assert r == f"Gordon\n"
def test_hive_partitioning_without_setting(started_cluster):
hdfs_api = started_cluster.hdfs_api
hdfs_api.write_data(
@ -1301,7 +1289,7 @@ def test_hive_partitioning_without_setting(started_cluster):
with pytest.raises(QueryRuntimeException, match=pattern):
node1.query(
f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
settings={"use_hive_partitioning": 0},
)

View File

@ -26,6 +26,10 @@ while [[ $($CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='00000000
sleep 1
done
while [[ $($CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue WHERE type='ALTER_METADATA' AND database = '$CLICKHOUSE_DATABASE'" 2>&1) ]]; do
sleep 1
done
$CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE table_for_bad_alters;" # Type changed, but we can revert back
$CLICKHOUSE_CLIENT --query "INSERT INTO table_for_bad_alters VALUES(2, 2, 7)"

View File

@ -23,11 +23,11 @@ $CLICKHOUSE_CLIENT --query "
DETACH TABLE r2;
"
$CLICKHOUSE_CLIENT --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})"
# insert_keeper_fault_injection_probability=0 -- can slowdown insert a lot (produce a lot of parts)
$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})"
# Now wait for cleanup thread
for _ in {1..60}; do
$CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS"
[[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break;

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage
# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage, no-distributed-cache
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -1,4 +1,14 @@
TESTING THE FILE HIVE PARTITIONING
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
Stephens Elizabeth
Franklin Elizabeth
Gibson Elizabeth
Greer Elizabeth
Delgado Elizabeth
Cross Elizabeth
first last Elizabeth
Jorge Frank Elizabeth
Hunter Moreno Elizabeth
@ -9,56 +19,36 @@ Stanley Gibson Elizabeth
Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
Elizabeth Gordon Elizabeth
Eva Schmidt Elizabeth Schmidt
Samuel Schmidt Elizabeth Schmidt
Eva Schmidt Elizabeth
Samuel Schmidt Elizabeth
Elizabeth Gordon Elizabeth Gordon
Elizabeth Gordon Elizabeth
Elizabeth Gordon Elizabeth Gordon
Elizabeth Gordon Elizabeth
first last Elizabeth
Jorge Frank Elizabeth
Hunter Moreno Elizabeth
Esther Guzman Elizabeth
Dennis Stephens Elizabeth
Nettie Franklin Elizabeth
Stanley Gibson Elizabeth
Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
Elizabeth Gordon Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
Stephens Elizabeth
Franklin Elizabeth
Gibson Elizabeth
Greer Elizabeth
Delgado Elizabeth
Cross Elizabeth
42 2020-01-01
[1,2,3] 42.42
Array(Int64) LowCardinality(Float64)
101
2070
4081
2070
2070
b
1
1
TESTING THE URL PARTITIONING
first last Elizabeth
Jorge Frank Elizabeth
Hunter Moreno Elizabeth
Esther Guzman Elizabeth
Dennis Stephens Elizabeth
Nettie Franklin Elizabeth
Stanley Gibson Elizabeth
Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
Elizabeth Gordon Elizabeth
Eva Schmidt Elizabeth Schmidt
Samuel Schmidt Elizabeth Schmidt
Eva Schmidt Elizabeth
Samuel Schmidt Elizabeth
Elizabeth Gordon Elizabeth Gordon
Elizabeth Gordon Elizabeth
Elizabeth Gordon Elizabeth Gordon
Elizabeth Gordon Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
Stephens Elizabeth
Franklin Elizabeth
Gibson Elizabeth
Greer Elizabeth
Delgado Elizabeth
Cross Elizabeth
first last Elizabeth
Jorge Frank Elizabeth
Hunter Moreno Elizabeth
@ -71,6 +61,16 @@ Jeffery Delgado Elizabeth
Clara Cross Elizabeth
1
TESTING THE S3 PARTITIONING
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
Stephens Elizabeth
Franklin Elizabeth
Gibson Elizabeth
Greer Elizabeth
Delgado Elizabeth
Cross Elizabeth
first last Elizabeth
Jorge Frank Elizabeth
Hunter Moreno Elizabeth
@ -81,40 +81,35 @@ Stanley Gibson Elizabeth
Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
Elizabeth Gordon Elizabeth
Eva Schmidt Elizabeth Schmidt
Samuel Schmidt Elizabeth Schmidt
Eva Schmidt Elizabeth
Samuel Schmidt Elizabeth
Elizabeth Gordon Elizabeth Gordon
Elizabeth Gordon Elizabeth
Elizabeth Gordon Elizabeth Gordon
Elizabeth Gordon Elizabeth
first last Elizabeth
Jorge Frank Elizabeth
Hunter Moreno Elizabeth
Esther Guzman Elizabeth
Dennis Stephens Elizabeth
Nettie Franklin Elizabeth
Stanley Gibson Elizabeth
Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
Elizabeth Gordon Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
Stephens Elizabeth
Franklin Elizabeth
Gibson Elizabeth
Greer Elizabeth
Delgado Elizabeth
Cross Elizabeth
OK
TESTING THE S3CLUSTER PARTITIONING
first last Elizabeth
Jorge Frank Elizabeth
Hunter Moreno Elizabeth
Esther Guzman Elizabeth
Dennis Stephens Elizabeth
Nettie Franklin Elizabeth
Stanley Gibson Elizabeth
Eugenia Greer Elizabeth
Jeffery Delgado Elizabeth
Clara Cross Elizabeth
Elizabeth Gordon Elizabeth
Eva Schmidt Elizabeth Schmidt
Samuel Schmidt Elizabeth Schmidt
Eva Schmidt Elizabeth
Samuel Schmidt Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
Stephens Elizabeth
Franklin Elizabeth
Gibson Elizabeth
Greer Elizabeth
Delgado Elizabeth
Cross Elizabeth
last Elizabeth
Frank Elizabeth
Moreno Elizabeth
Guzman Elizabeth
Stephens Elizabeth
Franklin Elizabeth
Gibson Elizabeth
Greer Elizabeth
Delgado Elizabeth
Cross Elizabeth

View File

@ -11,48 +11,34 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE FILE HIVE PARTITIONING'"
$CLICKHOUSE_LOCAL -n -q """
set use_hive_partitioning = 1;
SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
SELECT *, _non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
SELECT _number, _date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1;
SELECT _array, _float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
SELECT toTypeName(_array), toTypeName(_float) FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE _number = 42;
SELECT number, date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1;
SELECT array, float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
SELECT toTypeName(array), toTypeName(float) FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE number = 42;
"""
$CLICKHOUSE_LOCAL -n -q """
set use_hive_partitioning = 1;
SELECT _identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
SELECT __identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1;
"""
$CLICKHOUSE_LOCAL -n -q """
set use_hive_partitioning = 1;
SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10;
""" 2>&1 | grep -c "INCORRECT_DATA"
$CLICKHOUSE_LOCAL -n -q """
set use_hive_partitioning = 0;
SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
@ -62,23 +48,9 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE URL PARTITIONING'"
$CLICKHOUSE_LOCAL -n -q """
set use_hive_partitioning = 1;
SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
SELECT *, _non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;"""
SELECT *, non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;"""
$CLICKHOUSE_LOCAL -n -q """
set use_hive_partitioning = 0;
@ -93,24 +65,10 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3 PARTITIONING'"
$CLICKHOUSE_CLIENT -n -q """
set use_hive_partitioning = 1;
SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
SELECT *, _non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
"""
$CLICKHOUSE_CLIENT -n -q """
@ -124,13 +82,7 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3CLUSTER PARTITIONING'"
$CLICKHOUSE_CLIENT -n -q """
set use_hive_partitioning = 1;
SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
"""