Merge branch 'layout_direct' of github.com:kekekekule/ClickHouse into dict_direct

This commit is contained in:
Nikolay Degterinsky 2020-05-03 15:12:39 +03:00
commit 2b46159a6f
7 changed files with 1029 additions and 1 deletions

View File

@ -0,0 +1,603 @@
#include "DirectDictionary.h"
#include <IO/WriteHelpers.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
#include <Core/Defines.h>
namespace DB
{
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
extern const int BAD_ARGUMENTS;
extern const int UNSUPPORTED_METHOD;
}
DirectDictionary::DirectDictionary(
const std::string & database_,
const std::string & name_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
const DictionaryLifetime dict_lifetime_,
BlockPtr saved_block_)
: database(database_)
, name(name_)
, full_name{database_.empty() ? name_ : (database_ + "." + name_)}
, dict_struct(dict_struct_)
, source_ptr{std::move(source_ptr_)}
, dict_lifetime(dict_lifetime_)
, saved_block{std::move(saved_block_)}
{
createAttributes();
calculateBytesAllocated();
temp_arena = std::make_unique<Arena>();
}
void DirectDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
{
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
getItemsImpl<UInt64, UInt64>(
*hierarchical_attribute,
ids,
[&](const size_t row, const UInt64 value) { out[row] = value; },
[&](const size_t) { return null_value; });
}
static inline DirectDictionary::Key getAt(const PaddedPODArray<DirectDictionary::Key> & arr, const size_t idx)
{
return arr[idx];
}
static inline DirectDictionary::Key getAt(const DirectDictionary::Key & value, const size_t)
{
return value;
}
DirectDictionary::Key DirectDictionary::getValueOrNullByKey(const Key & to_find) const
{
auto stream = source_ptr->loadAll();
stream->readPrefix();
while (const auto block = stream->read())
{
const IColumn & id_column = *block.safeGetByPosition(0).column;
for (const size_t attribute_idx : ext::range(0, attributes.size()))
{
const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
for (const auto row_idx : ext::range(0, id_column.size()))
{
const auto key = id_column[row_idx].get<UInt64>();
if (key == to_find)
return attribute_column[row_idx].get<Key>();
}
}
}
stream->readSuffix();
return std::get<Key>(hierarchical_attribute->null_values);
}
template <typename ChildType, typename AncestorType>
void DirectDictionary::isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const
{
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
const auto rows = out.size();
for (const auto row : ext::range(0, rows))
{
auto id = getAt(child_ids, row);
const auto ancestor_id = getAt(ancestor_ids, row);
for (size_t i = 0; id != null_value && id != ancestor_id && i < DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH; ++i)
id = getValueOrNullByKey(id);
out[row] = id != null_value && id == ancestor_id;
}
query_count.fetch_add(rows, std::memory_order_relaxed);
}
void DirectDictionary::isInVectorVector(
const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
{
isInImpl(child_ids, ancestor_ids, out);
}
void DirectDictionary::isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const
{
isInImpl(child_ids, ancestor_id, out);
}
void DirectDictionary::isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
{
isInImpl(child_id, ancestor_ids, out);
}
#define DECLARE(TYPE) \
void DirectDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
const auto null_value = std::get<TYPE>(attribute.null_values); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void DirectDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
{
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
const auto & null_value = std::get<StringRef>(attribute.null_values);
getItemsStringImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
[&](const size_t) { return String(null_value.data, null_value.size); });
}
#define DECLARE(TYPE) \
void DirectDictionary::get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void DirectDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
{
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
getItemsStringImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
[&](const size_t row) { const auto ref = def->getDataAt(row); return String(ref.data, ref.size); });
}
#define DECLARE(TYPE) \
void DirectDictionary::get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \
{ \
const auto & attribute = getAttribute(attribute_name); \
checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
\
getItemsImpl<TYPE, TYPE>( \
attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void DirectDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
{
const auto & attribute = getAttribute(attribute_name);
checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
DirectDictionary::getItemsStringImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const String value) { const auto ref = StringRef{value}; out->insertData(ref.data, ref.size); },
[&](const size_t) { return def; });
}
void DirectDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
{
const auto & attribute = attributes.front();
switch (attribute.type)
{
case AttributeUnderlyingType::utUInt8:
has<UInt8>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt16:
has<UInt16>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt32:
has<UInt32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt64:
has<UInt64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utUInt128:
has<UInt128>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt8:
has<Int8>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt16:
has<Int16>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt32:
has<Int32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utInt64:
has<Int64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utFloat32:
has<Float32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utFloat64:
has<Float64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utString:
has<String>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal32:
has<Decimal32>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal64:
has<Decimal64>(attribute, ids, out);
break;
case AttributeUnderlyingType::utDecimal128:
has<Decimal128>(attribute, ids, out);
break;
}
}
void DirectDictionary::createAttributes()
{
const auto size = dict_struct.attributes.size();
attributes.reserve(size);
for (const auto & attribute : dict_struct.attributes)
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attribute_name_by_index.emplace(attributes.size(), attribute.name);
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value, attribute.name));
if (attribute.hierarchical)
{
hierarchical_attribute = &attributes.back();
if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64)
throw Exception{full_name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
}
}
}
void DirectDictionary::calculateBytesAllocated()
{
bytes_allocated += attributes.size() * sizeof(attributes.front());
for (const auto & attribute : attributes)
{
if (attribute.type == AttributeUnderlyingType::utString)
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
}
}
template <typename T>
void DirectDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
}
template <>
void DirectDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
{
attribute.string_arena = std::make_unique<Arena>();
const String & string = null_value.get<String>();
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
}
DirectDictionary::Attribute DirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & attr_name)
{
Attribute attr{type, {}, {}, attr_name};
switch (type)
{
case AttributeUnderlyingType::utUInt8:
createAttributeImpl<UInt8>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt16:
createAttributeImpl<UInt16>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt32:
createAttributeImpl<UInt32>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt64:
createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::utUInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::utInt8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::utInt16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::utInt32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::utInt64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::utFloat64:
createAttributeImpl<Float64>(attr, null_value);
break;
case AttributeUnderlyingType::utString:
createAttributeImpl<String>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal32:
createAttributeImpl<Decimal32>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::utDecimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
}
return attr;
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void DirectDictionary::getItemsImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
{
const auto rows = ext::size(ids);
std::vector<bool> is_found(rows, false);
auto stream = source_ptr->loadAll();
stream->readPrefix();
while (const auto block = stream->read())
{
const IColumn & id_column = *block.safeGetByPosition(0).column;
for (const size_t attribute_idx : ext::range(0, attributes.size()))
{
const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
for (const auto row_idx : ext::range(0, id_column.size()))
{
const auto key = id_column[row_idx].get<UInt64>();
for (const auto row : ext::range(0, rows))
{
if (key == ids[row] && attribute.name == attribute_name_by_index.at(attribute_idx))
{
is_found[row] = true;
if (attribute.type == AttributeUnderlyingType::utFloat32)
{
set_value(row, static_cast<Float32>(attribute_column[row_idx].get<Float64>()));
}
else
{
set_value(row, static_cast<OutputType>(attribute_column[row_idx].get<AttributeType>()));
}
}
}
}
}
}
stream->readSuffix();
for (const auto row : ext::range(0, rows))
if (!is_found[row])
set_value(row, get_default(row));
query_count.fetch_add(rows, std::memory_order_relaxed);
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void DirectDictionary::getItemsStringImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
{
const auto rows = ext::size(ids);
std::vector<bool> is_found(rows, false);
for (const auto row : ext::range(0, rows))
{
auto stream = source_ptr->loadAll();
stream->readPrefix();
while (const auto block = stream->read())
{
const IColumn & id_column = *block.safeGetByPosition(0).column;
for (const size_t attribute_idx : ext::range(0, attributes.size()))
{
const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
for (const auto row_idx : ext::range(0, id_column.size()))
{
const auto key = id_column[row_idx].get<UInt64>();
if (key == ids[row] && attribute.name == attribute_name_by_index.at(attribute_idx))
{
is_found[row] = true;
const String from_source = attribute_column[row_idx].get<String>();
set_value(row, from_source);
}
}
}
}
stream->readSuffix();
}
for (const auto row : ext::range(0, rows))
if (!is_found[row])
set_value(row, get_default(row));
query_count.fetch_add(rows, std::memory_order_relaxed);
}
const DirectDictionary::Attribute & DirectDictionary::getAttribute(const std::string & attribute_name) const
{
const auto it = attribute_index_by_name.find(attribute_name);
if (it == std::end(attribute_index_by_name))
throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
return attributes[it->second];
}
template <typename T>
void DirectDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
{
const auto rows = ext::size(ids);
for (const auto row : ext::range(0, rows))
out[row] = 0;
auto stream = source_ptr->loadAll();
stream->readPrefix();
while (const auto block = stream->read())
{
const IColumn & id_column = *block.safeGetByPosition(0).column;
for (const auto row_idx : ext::range(0, id_column.size()))
{
const auto key = id_column[row_idx].get<UInt64>();
for (const auto row : ext::range(0, rows))
if (key == ids[row])
out[row] = 1;
}
}
stream->readSuffix();
query_count.fetch_add(rows, std::memory_order_relaxed);
}
PaddedPODArray<DirectDictionary::Key> DirectDictionary::getIds() const
{
PaddedPODArray<Key> ids;
auto stream = source_ptr->loadAll();
stream->readPrefix();
while (const auto block = stream->read())
{
const IColumn & id_column = *block.safeGetByPosition(0).column;
for (const auto row_idx : ext::range(0, id_column.size()))
{
const auto key = id_column[row_idx].get<UInt64>();
ids.push_back(key);
}
}
stream->readSuffix();
return ids;
}
BlockInputStreamPtr DirectDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<DirectDictionary, Key>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
}
void registerDictionaryDirect(DictionaryFactory & factory)
{
auto create_layout = [=](const std::string & full_name,
const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
DictionarySourcePtr source_ptr) -> DictionaryPtr
{
if (dict_struct.key)
throw Exception{"'key' is not supported for dictionary of layout 'direct'", ErrorCodes::UNSUPPORTED_METHOD};
if (dict_struct.range_min || dict_struct.range_max)
throw Exception{full_name
+ ": elements .structure.range_min and .structure.range_max should be defined only "
"for a dictionary of layout 'range_hashed'",
ErrorCodes::BAD_ARGUMENTS};
const String database = config.getString(config_prefix + ".database", "");
const String name = config.getString(config_prefix + ".name");
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
return std::make_unique<DirectDictionary>(database, name, dict_struct, std::move(source_ptr), dict_lifetime);
};
factory.registerLayout("direct", create_layout, false);
}
}

View File

@ -0,0 +1,236 @@
#pragma once
#include <atomic>
#include <variant>
#include <vector>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
#include <Core/Block.h>
#include <ext/range.h>
#include <ext/size.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
namespace DB
{
using BlockPtr = std::shared_ptr<Block>;
class DirectDictionary final : public IDictionary
{
public:
DirectDictionary(
const std::string & database_,
const std::string & name_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
const DictionaryLifetime dict_lifetime_,
BlockPtr saved_block_ = nullptr);
const std::string & getDatabase() const override { return database; }
const std::string & getName() const override { return name; }
const std::string & getFullName() const override { return full_name; }
std::string getTypeName() const override { return "Direct"; }
size_t getBytesAllocated() const override { return bytes_allocated; }
size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
double getHitRate() const override { return 1.0; }
size_t getElementCount() const override { return element_count; }
double getLoadFactor() const override { return static_cast<double>(element_count) / bucket_count; }
std::shared_ptr<const IExternalLoadable> clone() const override
{
return std::make_shared<DirectDictionary>(database, name, dict_struct, source_ptr->clone(), dict_lifetime, saved_block);
}
const IDictionarySource * getSource() const override { return source_ptr.get(); }
const DictionaryLifetime & getLifetime() const override { return dict_lifetime; }
const DictionaryStructure & getStructure() const override { return dict_struct; }
bool isInjective(const std::string & attribute_name) const override
{
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
}
bool hasHierarchy() const override { return hierarchical_attribute; }
void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
void isInVectorVector(
const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
const;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value>
using ContainerType = PaddedPODArray<Value>;
struct Attribute final
{
AttributeUnderlyingType type;
std::variant<
UInt8,
UInt16,
UInt32,
UInt64,
UInt128,
Int8,
Int16,
Int32,
Int64,
Decimal32,
Decimal64,
Decimal128,
Float32,
Float64,
StringRef>
null_values;
std::unique_ptr<Arena> string_arena;
std::string name;
};
void createAttributes();
template <typename T>
void addAttributeSize(const Attribute & attribute);
void calculateBytesAllocated();
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value, const std::string & name);
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsStringImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsImpl(
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename T>
void resize(Attribute & attribute, const Key id);
template <typename T>
void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
void setAttributeValue(Attribute & attribute, const Key id, const Field & value);
const Attribute & getAttribute(const std::string & attribute_name) const;
template <typename T>
void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
Key getValueOrNullByKey(const Key & to_find) const;
template <typename ChildType, typename AncestorType>
void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
PaddedPODArray<Key> getIds() const;
const std::string database;
const std::string name;
const std::string full_name;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;
const DictionaryLifetime dict_lifetime;
std::map<std::string, size_t> attribute_index_by_name;
std::map<size_t, std::string> attribute_name_by_index;
std::vector<Attribute> attributes;
const Attribute * hierarchical_attribute = nullptr;
std::unique_ptr<Arena> temp_arena;
size_t bytes_allocated = 0;
size_t element_count = 0;
size_t bucket_count = 0;
mutable std::atomic<size_t> query_count{0};
BlockPtr saved_block;
};
}

View File

@ -32,6 +32,7 @@ void registerDictionaries()
registerDictionaryHashed(factory);
registerDictionaryCache(factory);
registerDictionaryPolygon(factory);
registerDictionaryDirect(factory);
}
}

View File

@ -25,6 +25,7 @@ void registerDictionaryFlat(DictionaryFactory & factory);
void registerDictionaryHashed(DictionaryFactory & factory);
void registerDictionaryCache(DictionaryFactory & factory);
void registerDictionaryPolygon(DictionaryFactory & factory);
void registerDictionaryDirect(DictionaryFactory & factory);
void registerDictionaries();
}

View File

@ -34,6 +34,7 @@
#include <Dictionaries/RangeHashedDictionary.h>
#include <Dictionaries/TrieDictionary.h>
#include <Dictionaries/PolygonDictionary.h>
#include <Dictionaries/DirectDictionary.h>
#include <ext/range.h>
@ -138,7 +139,8 @@ private:
#if !defined(ARCADIA_BUILD)
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict_ptr) &&
#endif
!executeDispatchComplex<SimplePolygonDictionary>(block, arguments, result, dict_ptr))
!executeDispatchComplex<SimplePolygonDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchSimple<DirectDictionary>(block, arguments, result, dict_ptr))
throw Exception{"Unsupported dictionary type " + dict_ptr->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
}
@ -304,6 +306,7 @@ private:
context.checkAccess(AccessType::dictGet, dict_ptr->getDatabaseOrNoDatabaseTag(), dict_ptr->getName());
if (!executeDispatch<FlatDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<DirectDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<HashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<CacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyHashedDictionary>(block, arguments, result, dict_ptr) &&
@ -488,6 +491,7 @@ private:
context.checkAccess(AccessType::dictGet, dict_ptr->getDatabaseOrNoDatabaseTag(), dict_ptr->getName());
if (!executeDispatch<FlatDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<DirectDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<HashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<CacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyHashedDictionary>(block, arguments, result, dict_ptr) &&
@ -828,6 +832,7 @@ private:
context.checkAccess(AccessType::dictGet, dict_ptr->getDatabaseOrNoDatabaseTag(), dict_ptr->getName());
if (!executeDispatch<FlatDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<DirectDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<HashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<CacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyHashedDictionary>(block, arguments, result, dict_ptr) &&
@ -1090,6 +1095,7 @@ private:
context.checkAccess(AccessType::dictGet, dict_ptr->getDatabaseOrNoDatabaseTag(), dict_ptr->getName());
if (!executeDispatch<FlatDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<DirectDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<HashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<CacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyHashedDictionary>(block, arguments, result, dict_ptr) &&
@ -1665,6 +1671,7 @@ private:
context.checkAccess(AccessType::dictGet, dict_ptr->getDatabaseOrNoDatabaseTag(), dict_ptr->getName());
if (!executeDispatch<FlatDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<DirectDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<HashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<CacheDictionary>(block, arguments, result, dict_ptr))
throw Exception{"Unsupported dictionary type " + dict_ptr->getTypeName(), ErrorCodes::UNKNOWN_TYPE};
@ -1829,6 +1836,7 @@ private:
context.checkAccess(AccessType::dictGet, dict_ptr->getDatabaseOrNoDatabaseTag(), dict_ptr->getName());
if (!executeDispatch<FlatDictionary>(block, arguments, result, dict_ptr)
&& !executeDispatch<DirectDictionary>(block, arguments, result, dict_ptr)
&& !executeDispatch<HashedDictionary>(block, arguments, result, dict_ptr)
&& !executeDispatch<CacheDictionary>(block, arguments, result, dict_ptr))
throw Exception{"Unsupported dictionary type " + dict_ptr->getTypeName(), ErrorCodes::UNKNOWN_TYPE};

View File

@ -0,0 +1,45 @@
INITIALIZING DICTIONARY
[3,2,1]
1
0
1
0
2
0
2.3
1.6
0.5
London
Great Britain
NONE
1 Russia
2 Moscow
3 Center
4 Great Britain
5 London
6 NONE
7 NONE
8 NONE
9 NONE
10 NONE
1 0.5
2 1.6
3 2.3
4 0.2
5 4.9
6 0
7 0
8 0
9 0
10 0
Russia
Moscow
Center
Great Britain
London
NONE
NONE
NONE
NONE
NONE
END

View File

@ -0,0 +1,134 @@
DROP DATABASE IF EXISTS database_for_dict;
CREATE DATABASE database_for_dict Engine = Ordinary;
DROP TABLE IF EXISTS database_for_dict.table_for_dict1;
DROP TABLE IF EXISTS database_for_dict.table_for_dict2;
DROP TABLE IF EXISTS database_for_dict.table_for_dict3;
CREATE TABLE database_for_dict.table_for_dict1
(
key_column UInt64,
second_column UInt64,
third_column String
)
ENGINE = MergeTree()
ORDER BY key_column;
INSERT INTO database_for_dict.table_for_dict1 VALUES (100500, 10000000, 'Hello world');
CREATE TABLE database_for_dict.table_for_dict2
(
region_id UInt64,
parent_region UInt64,
region_name String
)
ENGINE = MergeTree()
ORDER BY region_id;
INSERT INTO database_for_dict.table_for_dict2 VALUES (1, 0, 'Russia');
INSERT INTO database_for_dict.table_for_dict2 VALUES (2, 1, 'Moscow');
INSERT INTO database_for_dict.table_for_dict2 VALUES (3, 2, 'Center');
INSERT INTO database_for_dict.table_for_dict2 VALUES (4, 0, 'Great Britain');
INSERT INTO database_for_dict.table_for_dict2 VALUES (5, 4, 'London');
CREATE TABLE database_for_dict.table_for_dict3
(
region_id UInt64,
parent_region Float32,
region_name String
)
ENGINE = MergeTree()
ORDER BY region_id;
INSERT INTO database_for_dict.table_for_dict3 VALUES (1, 0.5, 'Russia');
INSERT INTO database_for_dict.table_for_dict3 VALUES (2, 1.6, 'Moscow');
INSERT INTO database_for_dict.table_for_dict3 VALUES (3, 2.3, 'Center');
INSERT INTO database_for_dict.table_for_dict3 VALUES (4, 0.2, 'Great Britain');
INSERT INTO database_for_dict.table_for_dict3 VALUES (5, 4.9, 'London');
DROP DATABASE IF EXISTS ordinary_db;
CREATE DATABASE ordinary_db ENGINE = Ordinary;
DROP DICTIONARY IF EXISTS ordinary_db.dict1;
DROP DICTIONARY IF EXISTS ordinary_db.dict2;
DROP DICTIONARY IF EXISTS ordinary_db.dict3;
CREATE DICTIONARY ordinary_db.dict1
(
key_column UInt64 DEFAULT 0,
second_column UInt64 DEFAULT 1,
third_column String DEFAULT 'qqq'
)
PRIMARY KEY key_column
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict1' PASSWORD '' DB 'database_for_dict'))
LIFETIME(MIN 1 MAX 600)
LAYOUT(DIRECT()) SETTINGS(max_result_bytes=1);
CREATE DICTIONARY ordinary_db.dict2
(
region_id UInt64 DEFAULT 0,
parent_region UInt64 DEFAULT 0 HIERARCHICAL,
region_name String DEFAULT ''
)
PRIMARY KEY region_id
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict2' PASSWORD '' DB 'database_for_dict'))
LIFETIME(MIN 1 MAX 600)
LAYOUT(DIRECT());
CREATE DICTIONARY ordinary_db.dict3
(
region_id UInt64 DEFAULT 0,
parent_region Float32 DEFAULT 0,
region_name String DEFAULT ''
)
PRIMARY KEY region_id
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict3' PASSWORD '' DB 'database_for_dict'))
LIFETIME(MIN 1 MAX 600)
LAYOUT(DIRECT());
SELECT 'INITIALIZING DICTIONARY';
SELECT dictGetHierarchy('ordinary_db.dict2', toUInt64(3));
SELECT dictHas('ordinary_db.dict2', toUInt64(3));
SELECT dictHas('ordinary_db.dict2', toUInt64(45));
SELECT dictIsIn('ordinary_db.dict2', toUInt64(3), toUInt64(1));
SELECT dictIsIn('ordinary_db.dict2', toUInt64(1), toUInt64(3));
SELECT dictGetUInt64('ordinary_db.dict2', 'parent_region', toUInt64(3));
SELECT dictGetUInt64('ordinary_db.dict2', 'parent_region', toUInt64(99));
SELECT dictGetFloat32('ordinary_db.dict3', 'parent_region', toUInt64(3));
SELECT dictGetFloat32('ordinary_db.dict3', 'parent_region', toUInt64(2));
SELECT dictGetFloat32('ordinary_db.dict3', 'parent_region', toUInt64(1));
SELECT dictGetString('ordinary_db.dict2', 'region_name', toUInt64(5));
SELECT dictGetString('ordinary_db.dict2', 'region_name', toUInt64(4));
SELECT dictGetStringOrDefault('ordinary_db.dict2', 'region_name', toUInt64(100), 'NONE');
SELECT number + 1, dictGetStringOrDefault('ordinary_db.dict2', 'region_name', toUInt64(number + 1), 'NONE') chars FROM numbers(10);
SELECT number + 1, dictGetFloat32OrDefault('ordinary_db.dict3', 'parent_region', toUInt64(number + 1), toFloat32(0)) chars FROM numbers(10);
SELECT dictGetStringOrDefault('ordinary_db.dict2', 'region_name', toUInt64(1), 'NONE');
SELECT dictGetStringOrDefault('ordinary_db.dict2', 'region_name', toUInt64(2), 'NONE');
SELECT dictGetStringOrDefault('ordinary_db.dict2', 'region_name', toUInt64(3), 'NONE');
SELECT dictGetStringOrDefault('ordinary_db.dict2', 'region_name', toUInt64(4), 'NONE');
SELECT dictGetStringOrDefault('ordinary_db.dict2', 'region_name', toUInt64(5), 'NONE');
SELECT dictGetStringOrDefault('ordinary_db.dict2', 'region_name', toUInt64(6), 'NONE');
SELECT dictGetStringOrDefault('ordinary_db.dict2', 'region_name', toUInt64(7), 'NONE');
SELECT dictGetStringOrDefault('ordinary_db.dict2', 'region_name', toUInt64(8), 'NONE');
SELECT dictGetStringOrDefault('ordinary_db.dict2', 'region_name', toUInt64(9), 'NONE');
SELECT dictGetStringOrDefault('ordinary_db.dict2', 'region_name', toUInt64(10), 'NONE');
SELECT dictGetUInt64('ordinary_db.dict1', 'second_column', toUInt64(100500)); -- { serverError 396 }
SELECT 'END';
DROP DICTIONARY IF EXISTS ordinary_db.dict1;
DROP DICTIONARY IF EXISTS ordinary_db.dict2;
DROP DICTIONARY IF EXISTS ordinary_db.dict3;
DROP DATABASE IF EXISTS ordinary_db;
DROP TABLE IF EXISTS database_for_dict.table_for_dict1;
DROP TABLE IF EXISTS database_for_dict.table_for_dict2;
DROP TABLE IF EXISTS database_for_dict.table_for_dict3;
DROP DATABASE IF EXISTS database_for_dict;