review + testing the code

This commit is contained in:
yariks5s 2024-08-28 17:22:47 +00:00
parent 60c6eb2610
commit dc97bd6b92
7 changed files with 48 additions and 26 deletions

View File

@ -151,6 +151,15 @@ Names NamesAndTypesList::getNames() const
return res;
}
NameSet NamesAndTypesList::getNameSet() const
{
NameSet res;
res.reserve(size());
for (const NameAndTypePair & column : *this)
res.insert(column.name);
return res;
}
DataTypes NamesAndTypesList::getTypes() const
{
DataTypes res;

View File

@ -100,6 +100,7 @@ public:
void getDifference(const NamesAndTypesList & rhs, NamesAndTypesList & deleted, NamesAndTypesList & added) const;
Names getNames() const;
NameSet getNameSet() const;
DataTypes getTypes() const;
/// Remove columns which names are not in the `names`.

View File

@ -131,11 +131,10 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
else
{
ConfigurationPtr copy_configuration = configuration->clone();
auto keys = configuration->getPaths();
String partitioning_path = fs::path(configuration->getNamespace()) / keys[0];
auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, partitioning_path, local_context);
auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, local_context);
if (filter_dag)
{
auto keys = configuration->getPaths();
std::vector<String> paths;
paths.reserve(keys.size());
for (const auto & key : keys)
@ -515,7 +514,7 @@ StorageObjectStorageSource::GlobIterator::GlobIterator(
}
recursive = key_with_globs == "/**";
if (auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, key_with_globs, local_context))
if (auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, local_context))
{
VirtualColumnUtils::buildSetsForDAG(*filter_dag, getContext());
filter_expr = std::make_shared<ExpressionActions>(std::move(*filter_dag));

View File

@ -355,14 +355,18 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user
{
fs::path user_files_absolute_path = fs::weakly_canonical(user_files_path);
fs::path fs_table_path(table_path);
LOG_TRACE(getLogger("testing the paths"), "{} , {}", user_files_absolute_path, fs_table_path);
if (fs_table_path.is_relative())
fs_table_path = user_files_absolute_path / fs_table_path;
LOG_TRACE(getLogger("testing the paths"), "fs_table_path = {}", fs_table_path);
Strings paths;
/// Do not use fs::canonical or fs::weakly_canonical.
/// Otherwise it will not allow to work with symlinks in `user_files_path` directory.
String path = fs::absolute(fs_table_path).lexically_normal(); /// Normalize path.
LOG_TRACE(getLogger("testing the paths"), "path = {}", path);
bool can_be_directory = true;
if (path.find(PartitionedSink::PARTITION_ID_WILDCARD) != std::string::npos)
@ -395,7 +399,10 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user
}
for (const auto & cur_path : paths)
{
checkCreationIsAllowed(context, user_files_absolute_path, cur_path, can_be_directory);
LOG_TRACE(getLogger("checking all paths"), "{}", cur_path);
}
return paths;
}
@ -1140,7 +1147,7 @@ StorageFileSource::FilesIterator::FilesIterator(
{
std::optional<ActionsDAG> filter_dag;
if (!distributed_processing && !archive_info && !files.empty())
filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, files[0], context_);
filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, context_);
if (filter_dag)
{

View File

@ -214,10 +214,7 @@ public:
std::optional<ActionsDAG> filter_dag;
if (!uris.empty())
{
String partitioning_path = Poco::URI(uris[0]).getPath();
filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, partitioning_path, context);
}
filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, context);
if (filter_dag)
{

View File

@ -1,5 +1,6 @@
#include <memory>
#include <stack>
#include <unordered_set>
#include <Core/NamesAndTypes.h>
#include <Core/TypeId.h>
@ -125,9 +126,18 @@ void filterBlockWithExpression(const ExpressionActionsPtr & actions, Block & blo
}
}
NamesAndTypesList getCommonVirtualsForFileLikeStorage()
{
return {{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"_size", makeNullable(std::make_shared<DataTypeUInt64>())},
{"_time", makeNullable(std::make_shared<DataTypeDateTime>())},
{"_etag", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
}
NameSet getVirtualNamesForFileLikeStorage()
{
return {"_path", "_file", "_size", "_time", "_etag"};
return getCommonVirtualsForFileLikeStorage().getNameSet();
}
std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path)
@ -155,8 +165,10 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
{
VirtualColumnsDescription desc;
auto add_virtual = [&](const auto & name, const auto & type)
auto add_virtual = [&](const NameAndTypePair & pair)
{
const auto & name = pair.getNameInStorage();
const auto & type = pair.getTypeInStorage();
if (storage_columns.has(name))
{
if (!context->getSettingsRef().use_hive_partitioning)
@ -173,11 +185,8 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
desc.addEphemeral(name, type, "");
};
add_virtual("_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
add_virtual("_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
add_virtual("_size", makeNullable(std::make_shared<DataTypeUInt64>()));
add_virtual("_time", makeNullable(std::make_shared<DataTypeDateTime>()));
add_virtual("_etag", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
for (const auto & item : getCommonVirtualsForFileLikeStorage())
add_virtual(item);
if (context->getSettingsRef().use_hive_partitioning)
{
@ -189,9 +198,9 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
if (type == nullptr)
type = std::make_shared<DataTypeString>();
if (type->canBeInsideLowCardinality())
add_virtual(item.first, std::make_shared<DataTypeLowCardinality>(type));
add_virtual({item.first, std::make_shared<DataTypeLowCardinality>(type)});
else
add_virtual(item.first, type);
add_virtual({item.first, type});
}
}
@ -228,18 +237,18 @@ static void addFilterDataToVirtualColumns(Block & block, const String & path, si
block.getByName("_idx").column->assumeMutableRef().insert(idx);
}
std::optional<ActionsDAG> createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const String & path, const ContextPtr & context)
std::optional<ActionsDAG> createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
{
if (!predicate || virtual_columns.empty())
return {};
Block block;
std::unordered_map<std::string, std::string> keys;
NameSet common_virtuals;
if (context->getSettingsRef().use_hive_partitioning)
keys = parseHivePartitioningKeysAndValues(path);
common_virtuals = getVirtualNamesForFileLikeStorage();
for (const auto & column : virtual_columns)
{
if (column.name == "_file" || column.name == "_path" || keys.contains(column.name))
if (column.name == "_file" || column.name == "_path" || !common_virtuals.contains(column.name))
block.insert({column.type->createColumn(), column.type, column.name});
}
@ -262,10 +271,10 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
auto it = keys.find(column.name);
if (it != keys.end())
{
auto c = std::make_shared<DataTypeString>()->createColumn();
c->insert(it->second);
auto string_column = std::make_shared<DataTypeString>()->createColumn();
string_column->insert(it->second);
block.insert({column.type->createColumn(), column.type, column.name});
partitioning_columns.push_back({c->getPtr(), column.type, column.name});
partitioning_columns.push_back({string_column->getPtr(), column.type, column.name});
}
}
block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});

View File

@ -75,7 +75,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(
const std::string & sample_path = "",
std::optional<FormatSettings> format_settings_ = std::nullopt);
std::optional<ActionsDAG> createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const String & path, const ContextPtr & context);
std::optional<ActionsDAG> createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ExpressionActionsPtr & actions, const NamesAndTypesList & virtual_columns, const ContextPtr & context);