mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-13 09:52:38 +00:00
358 lines
13 KiB
C++
358 lines
13 KiB
C++
#include "DirectDictionary.h"
|
|
|
|
#include <Core/Defines.h>
|
|
#include <Common/HashTable/HashMap.h>
|
|
#include <DataTypes/DataTypesDecimal.h>
|
|
#include <Functions/FunctionHelpers.h>
|
|
|
|
#include <Dictionaries/DictionaryFactory.h>
|
|
#include <Dictionaries/HierarchyDictionariesUtils.h>
|
|
|
|
#include <Processors/QueryPipeline.h>
|
|
#include <Processors/Executors/PullingPipelineExecutor.h>
|
|
|
|
namespace DB
|
|
{
|
|
namespace ErrorCodes
|
|
{
|
|
extern const int UNSUPPORTED_METHOD;
|
|
extern const int BAD_ARGUMENTS;
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
DirectDictionary<dictionary_key_type>::DirectDictionary(
|
|
const StorageID & dict_id_,
|
|
const DictionaryStructure & dict_struct_,
|
|
DictionarySourcePtr source_ptr_)
|
|
: IDictionary(dict_id_)
|
|
, dict_struct(dict_struct_)
|
|
, source_ptr{std::move(source_ptr_)}
|
|
{
|
|
if (!source_ptr->supportsSelectiveLoad())
|
|
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "{}: source cannot be used with DirectDictionary", full_name);
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
Columns DirectDictionary<dictionary_key_type>::getColumns(
|
|
const Strings & attribute_names,
|
|
const DataTypes & result_types,
|
|
const Columns & key_columns,
|
|
const DataTypes & key_types [[maybe_unused]],
|
|
const Columns & default_values_columns) const
|
|
{
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Complex)
|
|
dict_struct.validateKeyTypes(key_types);
|
|
|
|
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
|
DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
|
|
const auto requested_keys = extractor.extractAllKeys();
|
|
|
|
DictionaryStorageFetchRequest request(dict_struct, attribute_names, result_types, default_values_columns);
|
|
|
|
HashMap<KeyType, size_t> key_to_fetched_index;
|
|
key_to_fetched_index.reserve(requested_keys.size());
|
|
|
|
auto fetched_columns_from_storage = request.makeAttributesResultColumns();
|
|
for (size_t attribute_index = 0; attribute_index < request.attributesSize(); ++attribute_index)
|
|
{
|
|
if (!request.shouldFillResultColumnWithIndex(attribute_index))
|
|
continue;
|
|
|
|
auto & fetched_column_from_storage = fetched_columns_from_storage[attribute_index];
|
|
fetched_column_from_storage->reserve(requested_keys.size());
|
|
}
|
|
|
|
size_t fetched_key_index = 0;
|
|
|
|
Columns block_key_columns;
|
|
size_t dictionary_keys_size = dict_struct.getKeysNames().size();
|
|
block_key_columns.reserve(dictionary_keys_size);
|
|
|
|
QueryPipeline pipeline;
|
|
pipeline.init(getSourceBlockInputStream(key_columns, requested_keys));
|
|
|
|
PullingPipelineExecutor executor(pipeline);
|
|
|
|
Block block;
|
|
while (executor.pull(block))
|
|
{
|
|
/// Split into keys columns and attribute columns
|
|
for (size_t i = 0; i < dictionary_keys_size; ++i)
|
|
block_key_columns.emplace_back(block.safeGetByPosition(i).column);
|
|
|
|
DictionaryKeysExtractor<dictionary_key_type> block_keys_extractor(block_key_columns, arena_holder.getComplexKeyArena());
|
|
auto block_keys = block_keys_extractor.extractAllKeys();
|
|
|
|
for (size_t attribute_index = 0; attribute_index < request.attributesSize(); ++attribute_index)
|
|
{
|
|
if (!request.shouldFillResultColumnWithIndex(attribute_index))
|
|
continue;
|
|
|
|
const auto & block_column = block.safeGetByPosition(dictionary_keys_size + attribute_index).column;
|
|
fetched_columns_from_storage[attribute_index]->insertRangeFrom(*block_column, 0, block_keys.size());
|
|
}
|
|
|
|
for (size_t block_key_index = 0; block_key_index < block_keys.size(); ++block_key_index)
|
|
{
|
|
auto block_key = block_keys[block_key_index];
|
|
key_to_fetched_index[block_key] = fetched_key_index;
|
|
++fetched_key_index;
|
|
}
|
|
|
|
block_key_columns.clear();
|
|
}
|
|
|
|
Field value_to_insert;
|
|
|
|
size_t requested_keys_size = requested_keys.size();
|
|
|
|
auto result_columns = request.makeAttributesResultColumns();
|
|
|
|
size_t keys_found = 0;
|
|
|
|
for (size_t attribute_index = 0; attribute_index < result_columns.size(); ++attribute_index)
|
|
{
|
|
if (!request.shouldFillResultColumnWithIndex(attribute_index))
|
|
continue;
|
|
|
|
auto & result_column = result_columns[attribute_index];
|
|
|
|
const auto & fetched_column_from_storage = fetched_columns_from_storage[attribute_index];
|
|
const auto & default_value_provider = request.defaultValueProviderAtIndex(attribute_index);
|
|
|
|
result_column->reserve(requested_keys_size);
|
|
|
|
for (size_t requested_key_index = 0; requested_key_index < requested_keys_size; ++requested_key_index)
|
|
{
|
|
const auto requested_key = requested_keys[requested_key_index];
|
|
const auto * it = key_to_fetched_index.find(requested_key);
|
|
|
|
if (it)
|
|
{
|
|
fetched_column_from_storage->get(it->getMapped(), value_to_insert);
|
|
++keys_found;
|
|
}
|
|
else
|
|
value_to_insert = default_value_provider.getDefaultValue(requested_key_index);
|
|
|
|
result_column->insert(value_to_insert);
|
|
}
|
|
}
|
|
|
|
query_count.fetch_add(requested_keys_size, std::memory_order_relaxed);
|
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
|
|
|
return request.filterRequestedColumns(result_columns);
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
|
|
const std::string & attribute_name,
|
|
const DataTypePtr & result_type,
|
|
const Columns & key_columns,
|
|
const DataTypes & key_types,
|
|
const ColumnPtr & default_values_column) const
|
|
{
|
|
return getColumns({ attribute_name }, { result_type }, key_columns, key_types, { default_values_column }).front();
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(
|
|
const Columns & key_columns,
|
|
const DataTypes & key_types [[maybe_unused]]) const
|
|
{
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Complex)
|
|
dict_struct.validateKeyTypes(key_types);
|
|
|
|
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
|
DictionaryKeysExtractor<dictionary_key_type> requested_keys_extractor(key_columns, arena_holder.getComplexKeyArena());
|
|
auto requested_keys = requested_keys_extractor.extractAllKeys();
|
|
size_t requested_keys_size = requested_keys.size();
|
|
|
|
HashMap<KeyType, size_t> requested_key_to_index;
|
|
requested_key_to_index.reserve(requested_keys_size);
|
|
|
|
for (size_t i = 0; i < requested_keys.size(); ++i)
|
|
{
|
|
auto requested_key = requested_keys[i];
|
|
requested_key_to_index[requested_key] = i;
|
|
}
|
|
|
|
auto result = ColumnUInt8::create(requested_keys_size, false);
|
|
auto & result_data = result->getData();
|
|
|
|
Columns block_key_columns;
|
|
size_t dictionary_keys_size = dict_struct.getKeysNames().size();
|
|
block_key_columns.reserve(dictionary_keys_size);
|
|
|
|
QueryPipeline pipeline;
|
|
pipeline.init(getSourceBlockInputStream(key_columns, requested_keys));
|
|
|
|
PullingPipelineExecutor executor(pipeline);
|
|
|
|
size_t keys_found = 0;
|
|
Block block;
|
|
while (executor.pull(block))
|
|
{
|
|
/// Split into keys columns and attribute columns
|
|
for (size_t i = 0; i < dictionary_keys_size; ++i)
|
|
block_key_columns.emplace_back(block.safeGetByPosition(i).column);
|
|
|
|
DictionaryKeysExtractor<dictionary_key_type> block_keys_extractor(block_key_columns, arena_holder.getComplexKeyArena());
|
|
size_t block_keys_size = block_keys_extractor.getKeysSize();
|
|
|
|
for (size_t i = 0; i < block_keys_size; ++i)
|
|
{
|
|
auto block_key = block_keys_extractor.extractCurrentKey();
|
|
|
|
const auto * it = requested_key_to_index.find(block_key);
|
|
assert(it);
|
|
|
|
size_t result_data_found_index = it->getMapped();
|
|
/// block_keys_size cannot be used, due to duplicates.
|
|
keys_found += !result_data[result_data_found_index];
|
|
result_data[result_data_found_index] = true;
|
|
|
|
block_keys_extractor.rollbackCurrentKey();
|
|
}
|
|
|
|
block_key_columns.clear();
|
|
}
|
|
|
|
query_count.fetch_add(requested_keys_size, std::memory_order_relaxed);
|
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
|
|
|
return result;
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
ColumnPtr DirectDictionary<dictionary_key_type>::getHierarchy(
|
|
ColumnPtr key_column,
|
|
const DataTypePtr & key_type) const
|
|
{
|
|
if (dictionary_key_type == DictionaryKeyType::Simple)
|
|
{
|
|
size_t keys_found;
|
|
auto result = getKeysHierarchyDefaultImplementation(this, key_column, key_type, keys_found);
|
|
query_count.fetch_add(key_column->size(), std::memory_order_relaxed);
|
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
|
return result;
|
|
}
|
|
else
|
|
return nullptr;
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::isInHierarchy(
|
|
ColumnPtr key_column,
|
|
ColumnPtr in_key_column,
|
|
const DataTypePtr & key_type) const
|
|
{
|
|
if (dictionary_key_type == DictionaryKeyType::Simple)
|
|
{
|
|
size_t keys_found = 0;
|
|
auto result = getKeysIsInHierarchyDefaultImplementation(this, key_column, in_key_column, key_type, keys_found);
|
|
query_count.fetch_add(key_column->size(), std::memory_order_relaxed);
|
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
|
return result;
|
|
}
|
|
else
|
|
return nullptr;
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
Pipe DirectDictionary<dictionary_key_type>::getSourceBlockInputStream(
|
|
const Columns & key_columns [[maybe_unused]],
|
|
const PaddedPODArray<KeyType> & requested_keys [[maybe_unused]]) const
|
|
{
|
|
size_t requested_keys_size = requested_keys.size();
|
|
|
|
Pipe pipe;
|
|
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
|
{
|
|
std::vector<UInt64> ids;
|
|
ids.reserve(requested_keys_size);
|
|
|
|
for (auto key : requested_keys)
|
|
ids.emplace_back(key);
|
|
|
|
pipe = source_ptr->loadIds(ids);
|
|
}
|
|
else
|
|
{
|
|
std::vector<size_t> requested_rows;
|
|
requested_rows.reserve(requested_keys_size);
|
|
for (size_t i = 0; i < requested_keys_size; ++i)
|
|
requested_rows.emplace_back(i);
|
|
|
|
pipe = source_ptr->loadKeys(key_columns, requested_rows);
|
|
}
|
|
|
|
return pipe;
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
Pipe DirectDictionary<dictionary_key_type>::read(const Names & /* column_names */, size_t /* max_block_size */) const
|
|
{
|
|
return source_ptr->loadAll();
|
|
}
|
|
|
|
namespace
|
|
{
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
DictionaryPtr createDirectDictionary(
|
|
const std::string & full_name,
|
|
const DictionaryStructure & dict_struct,
|
|
const Poco::Util::AbstractConfiguration & config,
|
|
const std::string & config_prefix,
|
|
DictionarySourcePtr source_ptr,
|
|
ContextPtr /* global_context */,
|
|
bool /* created_from_ddl */)
|
|
{
|
|
const auto * layout_name = dictionary_key_type == DictionaryKeyType::Simple ? "direct" : "complex_key_direct";
|
|
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
|
{
|
|
if (dict_struct.key)
|
|
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
|
"'key' is not supported for dictionary of layout '{}'",
|
|
layout_name);
|
|
}
|
|
else
|
|
{
|
|
if (dict_struct.id)
|
|
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
|
"'id' is not supported for dictionary of layout '{}'",
|
|
layout_name);
|
|
}
|
|
|
|
if (dict_struct.range_min || dict_struct.range_max)
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
"{}: elements .structure.range_min and .structure.range_max should be defined only "
|
|
"for a dictionary of layout 'range_hashed'",
|
|
full_name);
|
|
|
|
const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
|
|
|
|
if (config.has(config_prefix + ".lifetime.min") || config.has(config_prefix + ".lifetime.max"))
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
"'lifetime' parameter is redundant for the dictionary' of layout '{}'",
|
|
layout_name);
|
|
|
|
return std::make_unique<DirectDictionary<dictionary_key_type>>(dict_id, dict_struct, std::move(source_ptr));
|
|
}
|
|
}
|
|
|
|
template class DirectDictionary<DictionaryKeyType::Simple>;
|
|
template class DirectDictionary<DictionaryKeyType::Complex>;
|
|
|
|
void registerDictionaryDirect(DictionaryFactory & factory)
|
|
{
|
|
factory.registerLayout("direct", createDirectDictionary<DictionaryKeyType::Simple>, false);
|
|
factory.registerLayout("complex_key_direct", createDirectDictionary<DictionaryKeyType::Complex>, true);
|
|
}
|
|
|
|
|
|
}
|