give priority to parsed columns over storage columns

This commit is contained in:
Yarik Briukhovetskyi 2024-08-21 16:24:43 +02:00 committed by GitHub
parent 24eeaffa7a
commit 1afd3a7c3a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 33 additions and 30 deletions

View File

@ -444,8 +444,8 @@ StorageHive::StorageHive(
storage_metadata.setComment(comment_);
storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext());
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, getContext()));
setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), getContext()));
}
void StorageHive::lazyInitialize()

View File

@ -94,7 +94,7 @@ StorageObjectStorage::StorageObjectStorage(
if (sample_path.empty() && context->getSettingsRef().use_hive_partitioning)
sample_path = getPathSample(metadata, context);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context, sample_path, format_settings));
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context, sample_path, format_settings));
setInMemoryMetadata(metadata);
}

View File

@ -68,7 +68,7 @@ StorageObjectStorageCluster::StorageObjectStorageCluster(
if (sample_path.empty() && context_->getSettingsRef().use_hive_partitioning)
sample_path = getPathSample(metadata, context_);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context_, sample_path));
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context_, sample_path));
setInMemoryMetadata(metadata);
}

View File

@ -208,7 +208,7 @@ Chunk StorageObjectStorageSource::generate()
.filename = &filename,
.last_modified = object_info->metadata->last_modified,
.etag = &(object_info->metadata->etag)
}, getContext(), read_from_format_info.columns_description);
}, getContext());
const auto & partition_columns = configuration->getPartitionColumns();
if (!partition_columns.empty() && chunk_size && chunk.hasColumns())
@ -280,7 +280,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
const std::shared_ptr<IIterator> & file_iterator,
const ConfigurationPtr & configuration,
const ObjectStoragePtr & object_storage,
const ReadFromFormatInfo & read_from_format_info,
ReadFromFormatInfo & read_from_format_info,
const std::optional<FormatSettings> & format_settings,
const std::shared_ptr<const KeyCondition> & key_condition_,
const ContextPtr & context_,

View File

@ -74,7 +74,7 @@ protected:
const UInt64 max_block_size;
const bool need_only_count;
const size_t max_parsing_threads;
const ReadFromFormatInfo read_from_format_info;
ReadFromFormatInfo read_from_format_info;
const std::shared_ptr<ThreadPool> create_reader_pool;
std::shared_ptr<IIterator> file_iterator;
@ -122,7 +122,7 @@ protected:
const std::shared_ptr<IIterator> & file_iterator,
const ConfigurationPtr & configuration,
const ObjectStoragePtr & object_storage,
const ReadFromFormatInfo & read_from_format_info,
ReadFromFormatInfo & read_from_format_info,
const std::optional<FormatSettings> & format_settings,
const std::shared_ptr<const KeyCondition> & key_condition_,
const ContextPtr & context_,

View File

@ -524,7 +524,7 @@ Chunk ObjectStorageQueueSource::generateImpl()
{
.path = path,
.size = reader.getObjectInfo()->metadata->size_bytes
}, getContext(), read_from_format_info.columns_description);
}, getContext());
return chunk;
}

View File

@ -128,7 +128,7 @@ private:
const std::shared_ptr<FileIterator> file_iterator;
const ConfigurationPtr configuration;
const ObjectStoragePtr object_storage;
const ReadFromFormatInfo read_from_format_info;
ReadFromFormatInfo read_from_format_info;
const std::optional<FormatSettings> format_settings;
const ObjectStorageQueueSettings queue_settings;
const std::shared_ptr<ObjectStorageQueueMetadata> files_metadata;

View File

@ -169,7 +169,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue(
storage_metadata.setColumns(columns);
storage_metadata.setConstraints(constraints_);
storage_metadata.setComment(comment);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_));
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_));
setInMemoryMetadata(storage_metadata);
LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());

View File

@ -1112,9 +1112,9 @@ void StorageFile::setStorageMetadata(CommonArguments args)
storage_metadata.setConstraints(args.constraints);
storage_metadata.setComment(args.comment);
setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), args.getContext(), paths.empty() ? "" : paths[0], format_settings));
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, args.getContext(), paths.empty() ? "" : paths[0], format_settings));
setInMemoryMetadata(storage_metadata);
}
@ -1468,7 +1468,7 @@ Chunk StorageFileSource::generate()
.size = current_file_size,
.filename = (filename_override.has_value() ? &filename_override.value() : nullptr),
.last_modified = current_file_last_modified
}, getContext(), columns_description);
}, getContext());
return chunk;
}

View File

@ -60,8 +60,8 @@ StorageFileCluster::StorageFileCluster(
}
storage_metadata.setConstraints(constraints_);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, paths.empty() ? "" : paths[0]));
setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, paths.empty() ? "" : paths[0]));
}
void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context)

View File

@ -165,9 +165,9 @@ IStorageURLBase::IStorageURLBase(
storage_metadata.setConstraints(constraints_);
storage_metadata.setComment(comment);
setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_, getSampleURI(uri, context_), format_settings));
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_, getSampleURI(uri, context_), format_settings));
setInMemoryMetadata(storage_metadata);
}
@ -435,7 +435,7 @@ Chunk StorageURLSource::generate()
{
.path = curr_uri.getPath(),
.size = current_file_size,
}, getContext(), columns_description);
}, getContext());
return chunk;
}

View File

@ -75,8 +75,8 @@ StorageURLCluster::StorageURLCluster(
}
storage_metadata.setConstraints(constraints_);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, getSampleURI(uri, context)));
setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, getSampleURI(uri, context)));
}
void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context)

View File

@ -129,7 +129,7 @@ NameSet getVirtualNamesForFileLikeStorage()
return {"_path", "_file", "_size", "_time", "_etag"};
}
std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path, const ColumnsDescription & storage_columns)
std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path)
{
std::string pattern = "([^/]+)=([^/]+)/";
re2::StringPiece input_piece(path);
@ -145,34 +145,37 @@ std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(
used_keys.insert({key, value});
auto col_name = key;
while (storage_columns.has(col_name))
col_name = "_" + col_name;
key_values[col_name] = value;
}
return key_values;
}
VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional<FormatSettings> format_settings_)
VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional<FormatSettings> format_settings_)
{
VirtualColumnsDescription desc;
auto add_virtual = [&](const auto & name, const auto & type)
{
if (storage_columns.has(name))
auto local_type = type;
if (storage_columns.has(name) && !context->getSettingsRef().use_hive_partitioning)
return;
if (storage_columns.has(name))
{
local_type = storage_columns.get(name).type;
storage_columns.remove(name);
}
desc.addEphemeral(name, type, "");
desc.addEphemeral(name, local_type, "");
};
add_virtual("_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
add_virtual("_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
add_virtual("_size", makeNullable(std::make_shared<DataTypeUInt64>()));
add_virtual("_time", makeNullable(std::make_shared<DataTypeDateTime>()));
add_virtual("_etag", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
if (context->getSettingsRef().use_hive_partitioning)
{
auto map = parseHivePartitioningKeysAndValues(path, storage_columns);
auto map = parseHivePartitioningKeysAndValues(path);
auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context);
for (auto & item : map)
{
@ -245,11 +248,11 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
void addRequestedFileLikeStorageVirtualsToChunk(
Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns)
VirtualsForFileLikeStorage virtual_values, ContextPtr context)
{
std::unordered_map<std::string, std::string> hive_map;
if (context->getSettingsRef().use_hive_partitioning)
hive_map = parseHivePartitioningKeysAndValues(virtual_values.path, columns);
hive_map = parseHivePartitioningKeysAndValues(virtual_values.path);
for (const auto & virtual_column : requested_virtual_columns)
{

View File

@ -70,7 +70,7 @@ auto extractSingleValueFromBlock(const Block & block, const String & name)
NameSet getVirtualNamesForFileLikeStorage();
VirtualColumnsDescription getVirtualsForFileLikeStorage(
const ColumnsDescription & storage_columns,
ColumnsDescription & storage_columns,
const ContextPtr & context,
const std::string & sample_path = "",
std::optional<FormatSettings> format_settings_ = std::nullopt);
@ -105,7 +105,7 @@ struct VirtualsForFileLikeStorage
void addRequestedFileLikeStorageVirtualsToChunk(
Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns);
VirtualsForFileLikeStorage virtual_values, ContextPtr context);
}
}