mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
review + testing the code
This commit is contained in:
parent
60c6eb2610
commit
dc97bd6b92
@ -151,6 +151,15 @@ Names NamesAndTypesList::getNames() const
|
||||
return res;
|
||||
}
|
||||
|
||||
NameSet NamesAndTypesList::getNameSet() const
|
||||
{
|
||||
NameSet res;
|
||||
res.reserve(size());
|
||||
for (const NameAndTypePair & column : *this)
|
||||
res.insert(column.name);
|
||||
return res;
|
||||
}
|
||||
|
||||
DataTypes NamesAndTypesList::getTypes() const
|
||||
{
|
||||
DataTypes res;
|
||||
|
@ -100,6 +100,7 @@ public:
|
||||
void getDifference(const NamesAndTypesList & rhs, NamesAndTypesList & deleted, NamesAndTypesList & added) const;
|
||||
|
||||
Names getNames() const;
|
||||
NameSet getNameSet() const;
|
||||
DataTypes getTypes() const;
|
||||
|
||||
/// Remove columns which names are not in the `names`.
|
||||
|
@ -131,11 +131,10 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
|
||||
else
|
||||
{
|
||||
ConfigurationPtr copy_configuration = configuration->clone();
|
||||
auto keys = configuration->getPaths();
|
||||
String partitioning_path = fs::path(configuration->getNamespace()) / keys[0];
|
||||
auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, partitioning_path, local_context);
|
||||
auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, local_context);
|
||||
if (filter_dag)
|
||||
{
|
||||
auto keys = configuration->getPaths();
|
||||
std::vector<String> paths;
|
||||
paths.reserve(keys.size());
|
||||
for (const auto & key : keys)
|
||||
@ -515,7 +514,7 @@ StorageObjectStorageSource::GlobIterator::GlobIterator(
|
||||
}
|
||||
|
||||
recursive = key_with_globs == "/**";
|
||||
if (auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, key_with_globs, local_context))
|
||||
if (auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, local_context))
|
||||
{
|
||||
VirtualColumnUtils::buildSetsForDAG(*filter_dag, getContext());
|
||||
filter_expr = std::make_shared<ExpressionActions>(std::move(*filter_dag));
|
||||
|
@ -355,14 +355,18 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user
|
||||
{
|
||||
fs::path user_files_absolute_path = fs::weakly_canonical(user_files_path);
|
||||
fs::path fs_table_path(table_path);
|
||||
LOG_TRACE(getLogger("testing the paths"), "{} , {}", user_files_absolute_path, fs_table_path);
|
||||
if (fs_table_path.is_relative())
|
||||
fs_table_path = user_files_absolute_path / fs_table_path;
|
||||
|
||||
LOG_TRACE(getLogger("testing the paths"), "fs_table_path = {}", fs_table_path);
|
||||
|
||||
Strings paths;
|
||||
|
||||
/// Do not use fs::canonical or fs::weakly_canonical.
|
||||
/// Otherwise it will not allow to work with symlinks in `user_files_path` directory.
|
||||
String path = fs::absolute(fs_table_path).lexically_normal(); /// Normalize path.
|
||||
LOG_TRACE(getLogger("testing the paths"), "path = {}", path);
|
||||
bool can_be_directory = true;
|
||||
|
||||
if (path.find(PartitionedSink::PARTITION_ID_WILDCARD) != std::string::npos)
|
||||
@ -395,7 +399,10 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user
|
||||
}
|
||||
|
||||
for (const auto & cur_path : paths)
|
||||
{
|
||||
checkCreationIsAllowed(context, user_files_absolute_path, cur_path, can_be_directory);
|
||||
LOG_TRACE(getLogger("checking all paths"), "{}", cur_path);
|
||||
}
|
||||
|
||||
return paths;
|
||||
}
|
||||
@ -1140,7 +1147,7 @@ StorageFileSource::FilesIterator::FilesIterator(
|
||||
{
|
||||
std::optional<ActionsDAG> filter_dag;
|
||||
if (!distributed_processing && !archive_info && !files.empty())
|
||||
filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, files[0], context_);
|
||||
filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, context_);
|
||||
|
||||
if (filter_dag)
|
||||
{
|
||||
|
@ -214,10 +214,7 @@ public:
|
||||
|
||||
std::optional<ActionsDAG> filter_dag;
|
||||
if (!uris.empty())
|
||||
{
|
||||
String partitioning_path = Poco::URI(uris[0]).getPath();
|
||||
filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, partitioning_path, context);
|
||||
}
|
||||
filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, context);
|
||||
|
||||
if (filter_dag)
|
||||
{
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <memory>
|
||||
#include <stack>
|
||||
#include <unordered_set>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Core/TypeId.h>
|
||||
|
||||
@ -125,9 +126,18 @@ void filterBlockWithExpression(const ExpressionActionsPtr & actions, Block & blo
|
||||
}
|
||||
}
|
||||
|
||||
NamesAndTypesList getCommonVirtualsForFileLikeStorage()
|
||||
{
|
||||
return {{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"_size", makeNullable(std::make_shared<DataTypeUInt64>())},
|
||||
{"_time", makeNullable(std::make_shared<DataTypeDateTime>())},
|
||||
{"_etag", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
|
||||
}
|
||||
|
||||
NameSet getVirtualNamesForFileLikeStorage()
|
||||
{
|
||||
return {"_path", "_file", "_size", "_time", "_etag"};
|
||||
return getCommonVirtualsForFileLikeStorage().getNameSet();
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path)
|
||||
@ -155,8 +165,10 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
|
||||
{
|
||||
VirtualColumnsDescription desc;
|
||||
|
||||
auto add_virtual = [&](const auto & name, const auto & type)
|
||||
auto add_virtual = [&](const NameAndTypePair & pair)
|
||||
{
|
||||
const auto & name = pair.getNameInStorage();
|
||||
const auto & type = pair.getTypeInStorage();
|
||||
if (storage_columns.has(name))
|
||||
{
|
||||
if (!context->getSettingsRef().use_hive_partitioning)
|
||||
@ -173,11 +185,8 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
|
||||
desc.addEphemeral(name, type, "");
|
||||
};
|
||||
|
||||
add_virtual("_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
|
||||
add_virtual("_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
|
||||
add_virtual("_size", makeNullable(std::make_shared<DataTypeUInt64>()));
|
||||
add_virtual("_time", makeNullable(std::make_shared<DataTypeDateTime>()));
|
||||
add_virtual("_etag", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
|
||||
for (const auto & item : getCommonVirtualsForFileLikeStorage())
|
||||
add_virtual(item);
|
||||
|
||||
if (context->getSettingsRef().use_hive_partitioning)
|
||||
{
|
||||
@ -189,9 +198,9 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
|
||||
if (type == nullptr)
|
||||
type = std::make_shared<DataTypeString>();
|
||||
if (type->canBeInsideLowCardinality())
|
||||
add_virtual(item.first, std::make_shared<DataTypeLowCardinality>(type));
|
||||
add_virtual({item.first, std::make_shared<DataTypeLowCardinality>(type)});
|
||||
else
|
||||
add_virtual(item.first, type);
|
||||
add_virtual({item.first, type});
|
||||
}
|
||||
}
|
||||
|
||||
@ -228,18 +237,18 @@ static void addFilterDataToVirtualColumns(Block & block, const String & path, si
|
||||
block.getByName("_idx").column->assumeMutableRef().insert(idx);
|
||||
}
|
||||
|
||||
std::optional<ActionsDAG> createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const String & path, const ContextPtr & context)
|
||||
std::optional<ActionsDAG> createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
|
||||
{
|
||||
if (!predicate || virtual_columns.empty())
|
||||
return {};
|
||||
|
||||
Block block;
|
||||
std::unordered_map<std::string, std::string> keys;
|
||||
NameSet common_virtuals;
|
||||
if (context->getSettingsRef().use_hive_partitioning)
|
||||
keys = parseHivePartitioningKeysAndValues(path);
|
||||
common_virtuals = getVirtualNamesForFileLikeStorage();
|
||||
for (const auto & column : virtual_columns)
|
||||
{
|
||||
if (column.name == "_file" || column.name == "_path" || keys.contains(column.name))
|
||||
if (column.name == "_file" || column.name == "_path" || !common_virtuals.contains(column.name))
|
||||
block.insert({column.type->createColumn(), column.type, column.name});
|
||||
}
|
||||
|
||||
@ -262,10 +271,10 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
|
||||
auto it = keys.find(column.name);
|
||||
if (it != keys.end())
|
||||
{
|
||||
auto c = std::make_shared<DataTypeString>()->createColumn();
|
||||
c->insert(it->second);
|
||||
auto string_column = std::make_shared<DataTypeString>()->createColumn();
|
||||
string_column->insert(it->second);
|
||||
block.insert({column.type->createColumn(), column.type, column.name});
|
||||
partitioning_columns.push_back({c->getPtr(), column.type, column.name});
|
||||
partitioning_columns.push_back({string_column->getPtr(), column.type, column.name});
|
||||
}
|
||||
}
|
||||
block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
|
||||
|
@ -75,7 +75,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(
|
||||
const std::string & sample_path = "",
|
||||
std::optional<FormatSettings> format_settings_ = std::nullopt);
|
||||
|
||||
std::optional<ActionsDAG> createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const String & path, const ContextPtr & context);
|
||||
std::optional<ActionsDAG> createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
|
||||
|
||||
ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ExpressionActionsPtr & actions, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user