move the block inserting

This commit is contained in:
Yarik Briukhovetskyi 2024-09-13 19:58:19 +02:00 committed by GitHub
parent 991279e5c6
commit ad31d86a15
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -207,7 +207,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto
return desc;
}
static void addFilterDataToVirtualColumns(Block & block, const String & path, size_t idx, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
static void addFilterDataToVirtualColumns(Block & block, const String & path, size_t idx, ColumnWithTypeAndName partitioning_column, const ContextPtr & context)
{
if (block.has("_path"))
block.getByName("_path").column->assumeMutableRef().insert(path);
@ -224,21 +224,11 @@ static void addFilterDataToVirtualColumns(Block & block, const String & path, si
block.getByName("_file").column->assumeMutableRef().insert(file);
}
std::unordered_map<std::string, std::string> keys;
if (context->getSettingsRef().use_hive_partitioning)
keys = parseHivePartitioningKeysAndValues(path);
for (const auto & virt_column : virtual_columns)
if (block.has(partitioning_column.name))
{
auto it = keys.find(virt_column.name);
if (it != keys.end())
{
if (!block.has(virt_column.name))
block.insert({virt_column.type->createColumn(), virt_column.type, virt_column.name});
auto & column = block.getByName(virt_column.name).column;
ReadBufferFromString buf(it->second);
virt_column.type->getDefaultSerialization()->deserializeWholeText(column->assumeMutableRef(), buf, getFormatSettings(context));
}
auto & column = block.getByName(partitioning_column.name).column;
ReadBufferFromString buf(partitioning_column.column->getDataAt(0).toView());
partitioning_column.type->getDefaultSerialization()->deserializeWholeText(column->assumeMutableRef(), buf, getFormatSettings(context));
}
block.getByName("_idx").column->assumeMutableRef().insert(idx);
@ -266,15 +256,37 @@ std::optional<ActionsDAG> createPathAndFileFilterDAG(const ActionsDAG::Node * pr
ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ExpressionActionsPtr & actions, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
{
Block block;
std::vector<std::unordered_map<std::string, std::string>> keys_vec;
ColumnsWithTypeAndName partitioning_columns;
if (context->getSettingsRef().use_hive_partitioning)
{
for (const auto & path : paths)
keys_vec.push_back(parseHivePartitioningKeysAndValues(path));
}
for (const auto & column : virtual_columns)
{
if (column.name == "_file" || column.name == "_path")
block.insert({column.type->createColumn(), column.type, column.name});
else
{
for (auto & keys : keys_vec)
{
const auto & it = keys.find(column.name);
if (it != keys.end())
{
auto string_column = std::make_shared<DataTypeString>()->createColumn();
string_column->insert(it->second);
block.insert({column.type->createColumn(), column.type, column.name});
partitioning_columns.push_back({string_column->getPtr(), column.type, column.name});
keys.erase(it);
}
}
}
}
block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
for (size_t i = 0; i != paths.size(); ++i)
addFilterDataToVirtualColumns(block, paths[i], i, virtual_columns, context);
addFilterDataToVirtualColumns(block, paths[i], i, partitioning_columns[i], context);
filterBlockWithExpression(actions, block);