diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 655cc064fea..55bc8083ec8 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -356,18 +356,14 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user { fs::path user_files_absolute_path = fs::weakly_canonical(user_files_path); fs::path fs_table_path(table_path); - LOG_TRACE(getLogger("testing the paths"), "{} , {}", user_files_absolute_path, fs_table_path); if (fs_table_path.is_relative()) fs_table_path = user_files_absolute_path / fs_table_path; - LOG_TRACE(getLogger("testing the paths"), "fs_table_path = {}", fs_table_path); - Strings paths; /// Do not use fs::canonical or fs::weakly_canonical. /// Otherwise it will not allow to work with symlinks in `user_files_path` directory. String path = fs::absolute(fs_table_path).lexically_normal(); /// Normalize path. - LOG_TRACE(getLogger("testing the paths"), "path = {}", path); bool can_be_directory = true; if (path.find(PartitionedSink::PARTITION_ID_WILDCARD) != std::string::npos) @@ -400,10 +396,7 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user } for (const auto & cur_path : paths) - { checkCreationIsAllowed(context, user_files_absolute_path, cur_path, can_be_directory); - LOG_TRACE(getLogger("checking all paths"), "{}", cur_path); - } return paths; } diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 2bd7325a789..523f236bf59 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -207,7 +207,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto return desc; } -static void addFilterDataToVirtualColumns(Block & block, const String & path, size_t idx, ColumnsWithTypeAndName partitioning_keys, const ContextPtr & context) +static void addFilterDataToVirtualColumns(Block & block, const String & path, size_t idx, const NamesAndTypesList & virtual_columns, const ContextPtr & context) { if (block.has("_path")) block.getByName("_path").column->assumeMutableRef().insert(path); @@ -224,13 +224,20 @@ static void addFilterDataToVirtualColumns(Block & block, const String & path, si block.getByName("_file").column->assumeMutableRef().insert(file); } - for (const auto & item : partitioning_keys) + std::unordered_map keys; + if (context->getSettingsRef().use_hive_partitioning) + keys = parseHivePartitioningKeysAndValues(path); + + for (const auto & virt_column : virtual_columns) { - if (block.has(item.name)) + auto it = keys.find(virt_column.name); + if (it != keys.end()) { - auto column = block.getByName(item.name).column; - ReadBufferFromString buf(item.column->getDataAt(0).toView()); - item.type->getDefaultSerialization()->deserializeWholeText(column->assumeMutableRef(), buf, getFormatSettings(context)); + if (!block.has(virt_column.name)) + block.insert({virt_column.type->createColumn(), virt_column.type, virt_column.name}); + auto & column = block.getByName(virt_column.name).column; + ReadBufferFromString buf(it->second); + virt_column.type->getDefaultSerialization()->deserializeWholeText(column->assumeMutableRef(), buf, getFormatSettings(context)); } } @@ -259,28 +266,15 @@ std::optional createPathAndFileFilterDAG(const ActionsDAG::Node * pr ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const ExpressionActionsPtr & actions, const NamesAndTypesList & virtual_columns, const ContextPtr & context) { Block block; - std::unordered_map keys; - ColumnsWithTypeAndName partitioning_columns; - if (context->getSettingsRef().use_hive_partitioning) - keys = parseHivePartitioningKeysAndValues(paths[0]); for (const auto & column : virtual_columns) { if (column.name == "_file" || column.name == "_path") block.insert({column.type->createColumn(), column.type, column.name}); - - auto it = keys.find(column.name); - if (it != keys.end()) - { - auto string_column = std::make_shared()->createColumn(); - string_column->insert(it->second); - block.insert({column.type->createColumn(), column.type, column.name}); - partitioning_columns.push_back({string_column->getPtr(), column.type, column.name}); - } } block.insert({ColumnUInt64::create(), std::make_shared(), "_idx"}); for (size_t i = 0; i != paths.size(); ++i) - addFilterDataToVirtualColumns(block, paths[i], i, partitioning_columns, context); + addFilterDataToVirtualColumns(block, paths[i], i, virtual_columns, context); filterBlockWithExpression(actions, block); diff --git a/tests/queries/0_stateless/03231_hive_partitioning_filtering.sh b/tests/queries/0_stateless/03231_hive_partitioning_filtering.sh index a561758c726..b66d2971cac 100755 --- a/tests/queries/0_stateless/03231_hive_partitioning_filtering.sh +++ b/tests/queries/0_stateless/03231_hive_partitioning_filtering.sh @@ -11,6 +11,9 @@ cp -r $CURDIR/data_hive/ $DATA_DIR $CLICKHOUSE_CLIENT --query_id="test_03231_1_$CLICKHOUSE_TEST_UNIQUE_NAME" --query " SELECT countDistinct(_path) FROM file('$DATA_DIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' SETTINGS use_hive_partitioning=1; +" + +$CLICKHOUSE_CLIENT --query " SYSTEM FLUSH LOGS; " @@ -20,6 +23,9 @@ $CLICKHOUSE_CLIENT --query " $CLICKHOUSE_CLIENT --query_id="test_03231_2_$CLICKHOUSE_TEST_UNIQUE_NAME" --query " SELECT countDistinct(_path) FROM file('$DATA_DIR/data_hive/partitioning/identifier=*/email.csv') WHERE identifier = 2070 SETTINGS use_hive_partitioning=1; +" + +$CLICKHOUSE_CLIENT --query " SYSTEM FLUSH LOGS; " @@ -29,6 +35,9 @@ $CLICKHOUSE_CLIENT --query " $CLICKHOUSE_CLIENT --query_id="test_03231_3_$CLICKHOUSE_TEST_UNIQUE_NAME" --query " SELECT countDistinct(_path) FROM file('$DATA_DIR/data_hive/partitioning/array=*/sample.parquet') WHERE array = [1,2,3] SETTINGS use_hive_partitioning=1; +" + +$CLICKHOUSE_CLIENT --query " SYSTEM FLUSH LOGS; "