remove logging + fixing bug

This commit is contained in:
Yarik Briukhovetskyi 2024-09-06 20:24:18 +02:00 committed by GitHub
parent 2fa6be55ff
commit a903e1a726
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 23 additions and 27 deletions

View File

@ -356,18 +356,14 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user
{
fs::path user_files_absolute_path = fs::weakly_canonical(user_files_path);
fs::path fs_table_path(table_path);
LOG_TRACE(getLogger("testing the paths"), "{} , {}", user_files_absolute_path, fs_table_path);
if (fs_table_path.is_relative())
fs_table_path = user_files_absolute_path / fs_table_path;
LOG_TRACE(getLogger("testing the paths"), "fs_table_path = {}", fs_table_path);
Strings paths;
/// Do not use fs::canonical or fs::weakly_canonical.
/// Otherwise it will not allow to work with symlinks in `user_files_path` directory.
String path = fs::absolute(fs_table_path).lexically_normal(); /// Normalize path.
LOG_TRACE(getLogger("testing the paths"), "path = {}", path);
bool can_be_directory = true;
if (path.find(PartitionedSink::PARTITION_ID_WILDCARD) != std::string::npos)
@ -400,10 +396,7 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user
}
for (const auto & cur_path : paths)
{
checkCreationIsAllowed(context, user_files_absolute_path, cur_path, can_be_directory);
LOG_TRACE(getLogger("checking all paths"), "{}", cur_path);
}
return paths;
}

View File

@ -207,7 +207,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
return desc;
}
static void addFilterDataToVirtualColumns(Block & block, const String & path, size_t idx, ColumnsWithTypeAndName partitioning_keys, const ContextPtr & context)
static void addFilterDataToVirtualColumns(Block & block, const String & path, size_t idx, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
{
if (block.has("_path"))
block.getByName("_path").column->assumeMutableRef().insert(path);
@ -224,13 +224,20 @@ static void addFilterDataToVirtualColumns(Block & block, const String & path, si
block.getByName("_file").column->assumeMutableRef().insert(file);
}
for (const auto & item : partitioning_keys)
std::unordered_map<std::string, std::string> keys;
if (context->getSettingsRef().use_hive_partitioning)
keys = parseHivePartitioningKeysAndValues(path);
for (const auto & virt_column : virtual_columns)
{
if (block.has(item.name))
auto it = keys.find(virt_column.name);
if (it != keys.end())
{
auto column = block.getByName(item.name).column;
ReadBufferFromString buf(item.column->getDataAt(0).toView());
item.type->getDefaultSerialization()->deserializeWholeText(column->assumeMutableRef(), buf, getFormatSettings(context));
if (!block.has(virt_column.name))
block.insert({virt_column.type->createColumn(), virt_column.type, virt_column.name});
auto & column = block.getByName(virt_column.name).column;
ReadBufferFromString buf(it->second);
virt_column.type->getDefaultSerialization()->deserializeWholeText(column->assumeMutableRef(), buf, getFormatSettings(context));
}
}
@ -259,28 +266,15 @@ std::optional<ActionsDAG> createPathAndFileFilterDAG(const ActionsDAG::Node * pr
ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ExpressionActionsPtr & actions, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
{
Block block;
std::unordered_map<std::string, std::string> keys;
ColumnsWithTypeAndName partitioning_columns;
if (context->getSettingsRef().use_hive_partitioning)
keys = parseHivePartitioningKeysAndValues(paths[0]);
for (const auto & column : virtual_columns)
{
if (column.name == "_file" || column.name == "_path")
block.insert({column.type->createColumn(), column.type, column.name});
auto it = keys.find(column.name);
if (it != keys.end())
{
auto string_column = std::make_shared<DataTypeString>()->createColumn();
string_column->insert(it->second);
block.insert({column.type->createColumn(), column.type, column.name});
partitioning_columns.push_back({string_column->getPtr(), column.type, column.name});
}
}
block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
for (size_t i = 0; i != paths.size(); ++i)
addFilterDataToVirtualColumns(block, paths[i], i, partitioning_columns, context);
addFilterDataToVirtualColumns(block, paths[i], i, virtual_columns, context);
filterBlockWithExpression(actions, block);

View File

@ -11,6 +11,9 @@ cp -r $CURDIR/data_hive/ $DATA_DIR
$CLICKHOUSE_CLIENT --query_id="test_03231_1_$CLICKHOUSE_TEST_UNIQUE_NAME" --query "
SELECT countDistinct(_path) FROM file('$DATA_DIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' SETTINGS use_hive_partitioning=1;
"
$CLICKHOUSE_CLIENT --query "
SYSTEM FLUSH LOGS;
"
@ -20,6 +23,9 @@ $CLICKHOUSE_CLIENT --query "
$CLICKHOUSE_CLIENT --query_id="test_03231_2_$CLICKHOUSE_TEST_UNIQUE_NAME" --query "
SELECT countDistinct(_path) FROM file('$DATA_DIR/data_hive/partitioning/identifier=*/email.csv') WHERE identifier = 2070 SETTINGS use_hive_partitioning=1;
"
$CLICKHOUSE_CLIENT --query "
SYSTEM FLUSH LOGS;
"
@ -29,6 +35,9 @@ $CLICKHOUSE_CLIENT --query "
$CLICKHOUSE_CLIENT --query_id="test_03231_3_$CLICKHOUSE_TEST_UNIQUE_NAME" --query "
SELECT countDistinct(_path) FROM file('$DATA_DIR/data_hive/partitioning/array=*/sample.parquet') WHERE array = [1,2,3] SETTINGS use_hive_partitioning=1;
"
$CLICKHOUSE_CLIENT --query "
SYSTEM FLUSH LOGS;
"