dbms: refactor FlatDictionary to use tuple and less macro-code[#METR-13298]

This commit is contained in:
Andrey Mironov 2015-02-27 14:57:14 +03:00
parent ca1c0756c2
commit 3f34c733dd
4 changed files with 187 additions and 235 deletions

View File

@ -52,15 +52,25 @@ public:
const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } const DictionaryLifetime & getLifetime() const override { return dict_lifetime; }
bool hasHierarchy() const override { return false; } bool hasHierarchy() const override { return hierarchical_attribute; }
id_t toParent(const id_t id) const override { return 0; } id_t toParent(const id_t id) const override
{
PODArray<UInt64> ids{1, id};
PODArray<UInt64> out{1};
getItems<UInt64>(*hierarchical_attribute, ids, out);
return out.front();
}
void toParent(const PODArray<id_t> & ids, PODArray<id_t> & out) const override
{
getItems<UInt64>(*hierarchical_attribute, ids, out);
}
#define DECLARE_INDIVIDUAL_GETTER(TYPE, LC_TYPE) \ #define DECLARE_INDIVIDUAL_GETTER(TYPE, LC_TYPE) \
TYPE get##TYPE(const std::string & attribute_name, const id_t id) const override\ TYPE get##TYPE(const std::string & attribute_name, const id_t id) const override\
{\ {\
const auto idx = getAttributeIndex(attribute_name);\ auto & attribute = getAttribute(attribute_name);\
const auto & attribute = attributes[idx];\
if (attribute.type != AttributeType::LC_TYPE)\ if (attribute.type != AttributeType::LC_TYPE)\
throw Exception{\ throw Exception{\
"Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\
@ -69,7 +79,7 @@ public:
\ \
PODArray<UInt64> ids{1, id};\ PODArray<UInt64> ids{1, id};\
PODArray<TYPE> out{1};\ PODArray<TYPE> out{1};\
getItems<TYPE>(idx, ids, out);\ getItems<TYPE>(attribute, ids, out);\
return out.front();\ return out.front();\
} }
DECLARE_INDIVIDUAL_GETTER(UInt8, uint8) DECLARE_INDIVIDUAL_GETTER(UInt8, uint8)
@ -85,8 +95,7 @@ public:
#undef DECLARE_INDIVIDUAL_GETTER #undef DECLARE_INDIVIDUAL_GETTER
String getString(const std::string & attribute_name, const id_t id) const override String getString(const std::string & attribute_name, const id_t id) const override
{ {
const auto idx = getAttributeIndex(attribute_name); auto & attribute = getAttribute(attribute_name);
const auto & attribute = attributes[idx];
if (attribute.type != AttributeType::string) if (attribute.type != AttributeType::string)
throw Exception{ throw Exception{
"Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
@ -95,7 +104,7 @@ public:
PODArray<UInt64> ids{1, id}; PODArray<UInt64> ids{1, id};
ColumnString out; ColumnString out;
getItems(idx, ids, &out); getItems(attribute, ids, &out);
return out.getDataAt(0); return out.getDataAt(0);
} }
@ -103,15 +112,14 @@ public:
#define DECLARE_MULTIPLE_GETTER(TYPE, LC_TYPE)\ #define DECLARE_MULTIPLE_GETTER(TYPE, LC_TYPE)\
void get##TYPE(const std::string & attribute_name, const PODArray<id_t> & ids, PODArray<TYPE> & out) const override\ void get##TYPE(const std::string & attribute_name, const PODArray<id_t> & ids, PODArray<TYPE> & out) const override\
{\ {\
const auto idx = getAttributeIndex(attribute_name);\ auto & attribute = getAttribute(attribute_name);\
const auto & attribute = attributes[idx];\
if (attribute.type != AttributeType::LC_TYPE)\ if (attribute.type != AttributeType::LC_TYPE)\
throw Exception{\ throw Exception{\
"Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\
ErrorCodes::TYPE_MISMATCH\ ErrorCodes::TYPE_MISMATCH\
};\ };\
\ \
getItems<TYPE>(idx, ids, out);\ getItems<TYPE>(attribute, ids, out);\
} }
DECLARE_MULTIPLE_GETTER(UInt8, uint8) DECLARE_MULTIPLE_GETTER(UInt8, uint8)
DECLARE_MULTIPLE_GETTER(UInt16, uint16) DECLARE_MULTIPLE_GETTER(UInt16, uint16)
@ -126,15 +134,14 @@ public:
#undef DECLARE_MULTIPLE_GETTER #undef DECLARE_MULTIPLE_GETTER
void getString(const std::string & attribute_name, const PODArray<id_t> & ids, ColumnString * out) const override void getString(const std::string & attribute_name, const PODArray<id_t> & ids, ColumnString * out) const override
{ {
const auto idx = getAttributeIndex(attribute_name); auto & attribute = getAttribute(attribute_name);
const auto & attribute = attributes[idx];
if (attribute.type != AttributeType::string) if (attribute.type != AttributeType::string)
throw Exception{ throw Exception{
"Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH ErrorCodes::TYPE_MISMATCH
}; };
getItems(idx, ids, out); getItems(attribute, ids, out);
} }
private: private:
@ -176,7 +183,15 @@ private:
attribute.null_value)); attribute.null_value));
if (attribute.hierarchical) if (attribute.hierarchical)
{
hierarchical_attribute = &attributes.back(); hierarchical_attribute = &attributes.back();
if (hierarchical_attribute->type != AttributeType::uint64)
throw Exception{
"Hierarchical attribute must be UInt64.",
ErrorCodes::TYPE_MISMATCH
};
}
} }
} }
@ -241,14 +256,14 @@ private:
} }
template <typename T> template <typename T>
void getItems(const std::size_t attribute_idx, const PODArray<id_t> & ids, PODArray<T> & out) const void getItems(attribute_t & attribute, const PODArray<id_t> & ids, PODArray<T> & out) const
{ {
HashMap<id_t, std::vector<std::size_t>> outdated_ids; HashMap<id_t, std::vector<std::size_t>> outdated_ids;
auto & attribute = attributes[attribute_idx];
auto & attribute_array = std::get<std::unique_ptr<T[]>>(attribute.arrays); auto & attribute_array = std::get<std::unique_ptr<T[]>>(attribute.arrays);
{ {
const Poco::ScopedReadRWLock read_lock{rw_lock}; const Poco::ScopedReadRWLock read_lock{rw_lock};
/// fetch up-to-date values, decide which ones require update /// fetch up-to-date values, decide which ones require update
for (const auto i : ext::range(0, ids.size())) for (const auto i : ext::range(0, ids.size()))
{ {
@ -289,12 +304,11 @@ private:
}); });
} }
void getItems(const std::size_t attribute_idx, const PODArray<id_t> & ids, ColumnString * out) const void getItems(attribute_t & attribute, const PODArray<id_t> & ids, ColumnString * out) const
{ {
/// save on some allocations /// save on some allocations
out->getOffsets().reserve(ids.size()); out->getOffsets().reserve(ids.size());
auto & attribute = attributes[attribute_idx];
auto & attribute_array = std::get<std::unique_ptr<StringRef[]>>(attribute.arrays); auto & attribute_array = std::get<std::unique_ptr<StringRef[]>>(attribute.arrays);
auto found_outdated_values = false; auto found_outdated_values = false;
@ -381,7 +395,7 @@ private:
const auto attribute_value = attribute_array[cell_idx]; const auto attribute_value = attribute_array[cell_idx];
map[id] = attribute_value; map[id] = attribute_value;
total_length += attribute_value.size + 1; total_length += (attribute_value.size + 1) * outdated_ids[id];
}); });
} }
@ -390,7 +404,7 @@ private:
for (const auto id : ids) for (const auto id : ids)
{ {
const auto it = map.find(id); const auto it = map.find(id);
const auto string = it != map.end() ? it->second : std::get<String>(attributes[attribute_idx].null_values); const auto string = it != map.end() ? it->second : std::get<String>(attribute.null_values);
out->insertData(string.data(), string.size()); out->insertData(string.data(), string.size());
} }
} }
@ -414,6 +428,11 @@ private:
const auto & ids = id_column->getData(); const auto & ids = id_column->getData();
/// cache column pointers
std::vector<const IColumn *> column_ptrs(attributes.size());
for (const auto i : ext::range(0, attributes.size()))
column_ptrs[i] = block.getByPosition(i + 1).column.get();
for (const auto i : ext::range(0, ids.size())) for (const auto i : ext::range(0, ids.size()))
{ {
const auto id = ids[i]; const auto id = ids[i];
@ -422,7 +441,7 @@ private:
for (const auto attribute_idx : ext::range(0, attributes.size())) for (const auto attribute_idx : ext::range(0, attributes.size()))
{ {
const auto & attribute_column = *block.getByPosition(attribute_idx + 1).column; const auto & attribute_column = *column_ptrs[attribute_idx];
auto & attribute = attributes[attribute_idx]; auto & attribute = attributes[attribute_idx];
setAttributeValue(attribute, cell_idx, attribute_column[i]); setAttributeValue(attribute, cell_idx, attribute_column[i]);
@ -486,7 +505,7 @@ private:
} }
} }
std::size_t getAttributeIndex(const std::string & attribute_name) const attribute_t & getAttribute(const std::string & attribute_name) const
{ {
const auto it = attribute_index_by_name.find(attribute_name); const auto it = attribute_index_by_name.find(attribute_name);
if (it == std::end(attribute_index_by_name)) if (it == std::end(attribute_index_by_name))
@ -495,7 +514,7 @@ private:
ErrorCodes::BAD_ARGUMENTS ErrorCodes::BAD_ARGUMENTS
}; };
return it->second; return attributes[it->second];
} }
static std::size_t round_up_to_power_of_two(std::size_t n) static std::size_t round_up_to_power_of_two(std::size_t n)
@ -529,7 +548,7 @@ private:
std::map<std::string, std::size_t> attribute_index_by_name; std::map<std::string, std::size_t> attribute_index_by_name;
mutable std::vector<attribute_t> attributes; mutable std::vector<attribute_t> attributes;
mutable std::vector<cell_metadata_t> cells; mutable std::vector<cell_metadata_t> cells;
const attribute_t * hierarchical_attribute = nullptr; attribute_t * hierarchical_attribute = nullptr;
mutable std::mt19937_64 rnd_engine{getSeed()}; mutable std::mt19937_64 rnd_engine{getSeed()};
}; };

View File

@ -6,6 +6,7 @@
#include <DB/Columns/ColumnString.h> #include <DB/Columns/ColumnString.h>
#include <statdaemons/ext/range.hpp> #include <statdaemons/ext/range.hpp>
#include <vector> #include <vector>
#include <tuple>
namespace DB namespace DB
{ {
@ -46,42 +47,29 @@ public:
id_t toParent(const id_t id) const override id_t toParent(const id_t id) const override
{ {
const auto attr = hierarchical_attribute; const auto attr = hierarchical_attribute;
const auto & array = *std::get<std::unique_ptr<PODArray<UInt64>>>(attr->arrays);
switch (hierarchical_attribute->type) return id < array.size() ? array[id] : std::get<UInt64>(attr->null_values);
{
case AttributeType::uint8: return id < attr->uint8_array->size() ? (*attr->uint8_array)[id] : attr->uint8_null_value;
case AttributeType::uint16: return id < attr->uint16_array->size() ? (*attr->uint16_array)[id] : attr->uint16_null_value;
case AttributeType::uint32: return id < attr->uint32_array->size() ? (*attr->uint32_array)[id] : attr->uint32_null_value;
case AttributeType::uint64: return id < attr->uint64_array->size() ? (*attr->uint64_array)[id] : attr->uint64_null_value;
case AttributeType::int8: return id < attr->int8_array->size() ? (*attr->int8_array)[id] : attr->int8_null_value;
case AttributeType::int16: return id < attr->int16_array->size() ? (*attr->int16_array)[id] : attr->int16_null_value;
case AttributeType::int32: return id < attr->int32_array->size() ? (*attr->int32_array)[id] : attr->int32_null_value;
case AttributeType::int64: return id < attr->int64_array->size() ? (*attr->int64_array)[id] : attr->int64_null_value;
case AttributeType::float32:
case AttributeType::float64:
case AttributeType::string:
break;
} }
throw Exception{ void toParent(const PODArray<id_t> & ids, PODArray<id_t> & out) const override
"Hierarchical attribute has non-integer type " + toString(hierarchical_attribute->type), {
ErrorCodes::TYPE_MISMATCH getItems<UInt64>(*hierarchical_attribute, ids, out);
};
} }
#define DECLARE_INDIVIDUAL_GETTER(TYPE, LC_TYPE) \ #define DECLARE_INDIVIDUAL_GETTER(TYPE, LC_TYPE) \
TYPE get##TYPE(const std::string & attribute_name, const id_t id) const override\ TYPE get##TYPE(const std::string & attribute_name, const id_t id) const override\
{\ {\
const auto idx = getAttributeIndex(attribute_name);\ const auto & attribute = getAttribute(attribute_name);\
const auto & attribute = attributes[idx];\
if (attribute.type != AttributeType::LC_TYPE)\ if (attribute.type != AttributeType::LC_TYPE)\
throw Exception{\ throw Exception{\
"Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\
ErrorCodes::TYPE_MISMATCH\ ErrorCodes::TYPE_MISMATCH\
};\ };\
if (id < attribute.LC_TYPE##_array->size())\ \
return (*attribute.LC_TYPE##_array)[id];\ const auto & array = *std::get<std::unique_ptr<PODArray<TYPE>>>(attribute.arrays);\
return attribute.LC_TYPE##_null_value;\ \
return id < array.size() ? array[id] : std::get<TYPE>(attribute.null_values);\
} }
DECLARE_INDIVIDUAL_GETTER(UInt8, uint8) DECLARE_INDIVIDUAL_GETTER(UInt8, uint8)
DECLARE_INDIVIDUAL_GETTER(UInt16, uint16) DECLARE_INDIVIDUAL_GETTER(UInt16, uint16)
@ -93,28 +81,32 @@ public:
DECLARE_INDIVIDUAL_GETTER(Int64, int64) DECLARE_INDIVIDUAL_GETTER(Int64, int64)
DECLARE_INDIVIDUAL_GETTER(Float32, float32) DECLARE_INDIVIDUAL_GETTER(Float32, float32)
DECLARE_INDIVIDUAL_GETTER(Float64, float64) DECLARE_INDIVIDUAL_GETTER(Float64, float64)
DECLARE_INDIVIDUAL_GETTER(String, string)
#undef DECLARE_INDIVIDUAL_GETTER #undef DECLARE_INDIVIDUAL_GETTER
String getString(const std::string & attribute_name, const id_t id) const override
{
const auto & attribute = getAttribute(attribute_name);
if (attribute.type != AttributeType::string)
throw Exception{
"Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH
};
const auto & array = *std::get<std::unique_ptr<PODArray<StringRef>>>(attribute.arrays);
return id < array.size() ? String{array[id]} : std::get<String>(attribute.null_values);
}
#define DECLARE_MULTIPLE_GETTER(TYPE, LC_TYPE)\ #define DECLARE_MULTIPLE_GETTER(TYPE, LC_TYPE)\
void get##TYPE(const std::string & attribute_name, const PODArray<id_t> & ids, PODArray<TYPE> & out) const override\ void get##TYPE(const std::string & attribute_name, const PODArray<id_t> & ids, PODArray<TYPE> & out) const override\
{\ {\
const auto idx = getAttributeIndex(attribute_name);\ const auto & attribute = getAttribute(attribute_name);\
const auto & attribute = attributes[idx];\
if (attribute.type != AttributeType::LC_TYPE)\ if (attribute.type != AttributeType::LC_TYPE)\
throw Exception{\ throw Exception{\
"Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\
ErrorCodes::TYPE_MISMATCH\ ErrorCodes::TYPE_MISMATCH\
};\ };\
\ \
const auto & attr = *attribute.LC_TYPE##_array;\ getItems<TYPE>(attribute, ids, out);\
const auto null_value = attribute.LC_TYPE##_null_value;\
\
for (const auto i : ext::range(0, ids.size()))\
{\
const auto id = ids[i];\
out[i] = id < attr.size() ? attr[id] : null_value;\
}\
} }
DECLARE_MULTIPLE_GETTER(UInt8, uint8) DECLARE_MULTIPLE_GETTER(UInt8, uint8)
DECLARE_MULTIPLE_GETTER(UInt16, uint16) DECLARE_MULTIPLE_GETTER(UInt16, uint16)
@ -129,16 +121,15 @@ public:
#undef DECLARE_MULTIPLE_GETTER #undef DECLARE_MULTIPLE_GETTER
void getString(const std::string & attribute_name, const PODArray<id_t> & ids, ColumnString * out) const override void getString(const std::string & attribute_name, const PODArray<id_t> & ids, ColumnString * out) const override
{ {
const auto idx = getAttributeIndex(attribute_name); const auto & attribute = getAttribute(attribute_name);
const auto & attribute = attributes[idx];
if (attribute.type != AttributeType::string) if (attribute.type != AttributeType::string)
throw Exception{ throw Exception{
"Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH ErrorCodes::TYPE_MISMATCH
}; };
const auto & attr = *attribute.string_array; const auto & attr = *std::get<std::unique_ptr<PODArray<StringRef>>>(attribute.arrays);
const auto null_value = attribute.string_null_value; const auto & null_value = std::get<String>(attribute.null_values);
for (const auto i : ext::range(0, ids.size())) for (const auto i : ext::range(0, ids.size()))
{ {
@ -149,32 +140,25 @@ public:
} }
private: private:
struct attribute_t struct attribute_t final
{ {
AttributeType type; AttributeType type;
UInt8 uint8_null_value; std::tuple<UInt8, UInt16, UInt32, UInt64,
UInt16 uint16_null_value; Int8, Int16, Int32, Int64,
UInt32 uint32_null_value; Float32, Float64,
UInt64 uint64_null_value; String> null_values;
Int8 int8_null_value; std::tuple<std::unique_ptr<PODArray<UInt8>>,
Int16 int16_null_value; std::unique_ptr<PODArray<UInt16>>,
Int32 int32_null_value; std::unique_ptr<PODArray<UInt32>>,
Int64 int64_null_value; std::unique_ptr<PODArray<UInt64>>,
Float32 float32_null_value; std::unique_ptr<PODArray<Int8>>,
Float64 float64_null_value; std::unique_ptr<PODArray<Int16>>,
String string_null_value; std::unique_ptr<PODArray<Int32>>,
std::unique_ptr<PODArray<UInt8>> uint8_array; std::unique_ptr<PODArray<Int64>>,
std::unique_ptr<PODArray<UInt16>> uint16_array; std::unique_ptr<PODArray<Float32>>,
std::unique_ptr<PODArray<UInt32>> uint32_array; std::unique_ptr<PODArray<Float64>>,
std::unique_ptr<PODArray<UInt64>> uint64_array; std::unique_ptr<PODArray<StringRef>>> arrays;
std::unique_ptr<PODArray<Int8>> int8_array;
std::unique_ptr<PODArray<Int16>> int16_array;
std::unique_ptr<PODArray<Int32>> int32_array;
std::unique_ptr<PODArray<Int64>> int64_array;
std::unique_ptr<PODArray<Float32>> float32_array;
std::unique_ptr<PODArray<Float64>> float64_array;
std::unique_ptr<Arena> string_arena; std::unique_ptr<Arena> string_arena;
std::unique_ptr<PODArray<StringRef>> string_array;
}; };
void createAttributes() void createAttributes()
@ -188,7 +172,15 @@ private:
attribute.null_value)); attribute.null_value));
if (attribute.hierarchical) if (attribute.hierarchical)
{
hierarchical_attribute = &attributes.back(); hierarchical_attribute = &attributes.back();
if (hierarchical_attribute->type != AttributeType::uint64)
throw Exception{
"Hierarchical attribute must be UInt64.",
ErrorCodes::TYPE_MISMATCH
};
}
} }
} }
@ -214,73 +206,65 @@ private:
stream->readSuffix(); stream->readSuffix();
} }
template <typename T>
void createAttributeImpl(attribute_t & attribute, const std::string & null_value)
{
const auto & null_value_ref = std::get<T>(attribute.null_values) = DB::parse<T>(null_value);
std::get<std::unique_ptr<PODArray<T>>>(attribute.arrays) =
std::make_unique<PODArray<T>>(initial_array_size, null_value_ref);
}
attribute_t createAttributeWithType(const AttributeType type, const std::string & null_value) attribute_t createAttributeWithType(const AttributeType type, const std::string & null_value)
{ {
attribute_t attr{type}; attribute_t attr{type};
switch (type) switch (type)
{ {
case AttributeType::uint8: case AttributeType::uint8: createAttributeImpl<UInt8>(attr, null_value); break;
attr.uint8_null_value = DB::parse<UInt8>(null_value); case AttributeType::uint16: createAttributeImpl<UInt16>(attr, null_value); break;
attr.uint8_array.reset(new PODArray<UInt8>); case AttributeType::uint32: createAttributeImpl<UInt32>(attr, null_value); break;
attr.uint8_array->resize_fill(initial_array_size, attr.uint8_null_value); case AttributeType::uint64: createAttributeImpl<UInt64>(attr, null_value); break;
break; case AttributeType::int8: createAttributeImpl<Int8>(attr, null_value); break;
case AttributeType::uint16: case AttributeType::int16: createAttributeImpl<Int16>(attr, null_value); break;
attr.uint16_null_value = DB::parse<UInt16>(null_value); case AttributeType::int32: createAttributeImpl<Int32>(attr, null_value); break;
attr.uint16_array.reset(new PODArray<UInt16>); case AttributeType::int64: createAttributeImpl<Int64>(attr, null_value); break;
attr.uint16_array->resize_fill(initial_array_size, attr.uint16_null_value); case AttributeType::float32: createAttributeImpl<Float32>(attr, null_value); break;
break; case AttributeType::float64: createAttributeImpl<Float64>(attr, null_value); break;
case AttributeType::uint32:
attr.uint32_null_value = DB::parse<UInt32>(null_value);
attr.uint32_array.reset(new PODArray<UInt32>);
attr.uint32_array->resize_fill(initial_array_size, attr.uint32_null_value);
break;
case AttributeType::uint64:
attr.uint64_null_value = DB::parse<UInt64>(null_value);
attr.uint64_array.reset(new PODArray<UInt64>);
attr.uint64_array->resize_fill(initial_array_size, attr.uint64_null_value);
break;
case AttributeType::int8:
attr.int8_null_value = DB::parse<Int8>(null_value);
attr.int8_array.reset(new PODArray<Int8>);
attr.int8_array->resize_fill(initial_array_size, attr.int8_null_value);
break;
case AttributeType::int16:
attr.int16_null_value = DB::parse<Int16>(null_value);
attr.int16_array.reset(new PODArray<Int16>);
attr.int16_array->resize_fill(initial_array_size, attr.int16_null_value);
break;
case AttributeType::int32:
attr.int32_null_value = DB::parse<Int32>(null_value);
attr.int32_array.reset(new PODArray<Int32>);
attr.int32_array->resize_fill(initial_array_size, attr.int32_null_value);
break;
case AttributeType::int64:
attr.int64_null_value = DB::parse<Int64>(null_value);
attr.int64_array.reset(new PODArray<Int64>);
attr.int64_array->resize_fill(initial_array_size, attr.int64_null_value);
break;
case AttributeType::float32:
attr.float32_null_value = DB::parse<Float32>(null_value);
attr.float32_array.reset(new PODArray<Float32>);
attr.float32_array->resize_fill(initial_array_size, attr.float32_null_value);
break;
case AttributeType::float64:
attr.float64_null_value = DB::parse<Float64>(null_value);
attr.float64_array.reset(new PODArray<Float64>);
attr.float64_array->resize_fill(initial_array_size, attr.float64_null_value);
break;
case AttributeType::string: case AttributeType::string:
attr.string_null_value = null_value; {
attr.string_arena.reset(new Arena); const auto & null_value_ref = std::get<String>(attr.null_values) = DB::parse<String>(null_value);
attr.string_array.reset(new PODArray<StringRef>); std::get<std::unique_ptr<PODArray<StringRef>>>(attr.arrays) =
attr.string_array->resize_fill(initial_array_size, attr.string_null_value); std::make_unique<PODArray<StringRef>>(initial_array_size, null_value_ref);
attr.string_arena = std::make_unique<Arena>();
break; break;
} }
};
return attr; return attr;
} }
template <typename T>
void getItems(const attribute_t & attribute, const PODArray<id_t> & ids, PODArray<T> & out) const
{
const auto & attr = *std::get<std::unique_ptr<PODArray<T>>>(attribute.arrays);
const auto null_value = std::get<T>(attribute.null_values);
for (const auto i : ext::range(0, ids.size()))
{
const auto id = ids[i];
out[i] = id < attr.size() ? attr[id] : null_value;
}
}
template <typename T>
void setAttributeValueImpl(attribute_t & attribute, const id_t id, const T value)
{
auto & array = *std::get<std::unique_ptr<PODArray<T>>>(attribute.arrays);
if (id >= array.size())
array.resize_fill(id, std::get<T>(attribute.null_values));
array[id] = value;
}
void setAttributeValue(attribute_t & attribute, const id_t id, const Field & value) void setAttributeValue(attribute_t & attribute, const id_t id, const Field & value)
{ {
if (id >= max_array_size) if (id >= max_array_size)
@ -293,87 +277,68 @@ private:
{ {
case AttributeType::uint8: case AttributeType::uint8:
{ {
if (id >= attribute.uint8_array->size()) setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
attribute.uint8_array->resize_fill(id, attribute.uint8_null_value);
(*attribute.uint8_array)[id] = value.get<UInt64>();
break; break;
} }
case AttributeType::uint16: case AttributeType::uint16:
{ {
if (id >= attribute.uint16_array->size()) setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
attribute.uint16_array->resize_fill(id, attribute.uint16_null_value);
(*attribute.uint16_array)[id] = value.get<UInt64>();
break; break;
} }
case AttributeType::uint32: case AttributeType::uint32:
{ {
if (id >= attribute.uint32_array->size()) setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
attribute.uint32_array->resize_fill(id, attribute.uint32_null_value);
(*attribute.uint32_array)[id] = value.get<UInt64>();
break; break;
} }
case AttributeType::uint64: case AttributeType::uint64:
{ {
if (id >= attribute.uint64_array->size()) setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
attribute.uint64_array->resize_fill(id, attribute.uint64_null_value);
(*attribute.uint64_array)[id] = value.get<UInt64>();
break; break;
} }
case AttributeType::int8: case AttributeType::int8:
{ {
if (id >= attribute.int8_array->size()) setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
attribute.int8_array->resize_fill(id, attribute.int8_null_value);
(*attribute.int8_array)[id] = value.get<Int64>();
break; break;
} }
case AttributeType::int16: case AttributeType::int16:
{ {
if (id >= attribute.int16_array->size()) setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
attribute.int16_array->resize_fill(id, attribute.int16_null_value);
(*attribute.int16_array)[id] = value.get<Int64>();
break; break;
} }
case AttributeType::int32: case AttributeType::int32:
{ {
if (id >= attribute.int32_array->size()) setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
attribute.int32_array->resize_fill(id, attribute.int32_null_value);
(*attribute.int32_array)[id] = value.get<Int64>();
break; break;
} }
case AttributeType::int64: case AttributeType::int64:
{ {
if (id >= attribute.int64_array->size()) setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
attribute.int64_array->resize_fill(id, attribute.int64_null_value);
(*attribute.int64_array)[id] = value.get<Int64>();
break; break;
} }
case AttributeType::float32: case AttributeType::float32:
{ {
if (id >= attribute.float32_array->size()) setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
attribute.float32_array->resize_fill(id, attribute.float32_null_value);
(*attribute.float32_array)[id] = value.get<Float64>();
break; break;
} }
case AttributeType::float64: case AttributeType::float64:
{ {
if (id >= attribute.float64_array->size()) setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
attribute.float64_array->resize_fill(id, attribute.float64_null_value);
(*attribute.float64_array)[id] = value.get<Float64>();
break; break;
} }
case AttributeType::string: case AttributeType::string:
{ {
if (id >= attribute.string_array->size()) auto & array = *std::get<std::unique_ptr<PODArray<StringRef>>>(attribute.arrays);
attribute.string_array->resize_fill(id, attribute.string_null_value); if (id >= array.size())
array.resize_fill(id, std::get<String>(attribute.null_values));
const auto & string = value.get<String>(); const auto & string = value.get<String>();
const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size()); const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size());
(*attribute.string_array)[id] = StringRef{string_in_arena, string.size()}; array[id] = StringRef{string_in_arena, string.size()};
break; break;
} }
} }
} }
std::size_t getAttributeIndex(const std::string & attribute_name) const const attribute_t & getAttribute(const std::string & attribute_name) const
{ {
const auto it = attribute_index_by_name.find(attribute_name); const auto it = attribute_index_by_name.find(attribute_name);
if (it == std::end(attribute_index_by_name)) if (it == std::end(attribute_index_by_name))
@ -382,7 +347,7 @@ private:
ErrorCodes::BAD_ARGUMENTS ErrorCodes::BAD_ARGUMENTS
}; };
return it->second; return attributes[it->second];
} }
const std::string name; const std::string name;

View File

@ -45,65 +45,26 @@ public:
{ {
const auto attr = hierarchical_attribute; const auto attr = hierarchical_attribute;
switch (hierarchical_attribute->type)
{
case AttributeType::uint8:
{
const auto it = attr->uint8_map->find(id);
return it != attr->uint8_map->end() ? it->second : attr->uint8_null_value;
}
case AttributeType::uint16:
{
const auto it = attr->uint16_map->find(id);
return it != attr->uint16_map->end() ? it->second : attr->uint16_null_value;
}
case AttributeType::uint32:
{
const auto it = attr->uint32_map->find(id);
return it != attr->uint32_map->end() ? it->second : attr->uint32_null_value;
}
case AttributeType::uint64:
{
const auto it = attr->uint64_map->find(id); const auto it = attr->uint64_map->find(id);
return it != attr->uint64_map->end() ? it->second : attr->uint64_null_value; return it != attr->uint64_map->end() ? it->second : attr->uint64_null_value;
} }
case AttributeType::int8:
{
const auto it = attr->int8_map->find(id);
return it != attr->int8_map->end() ? it->second : attr->int8_null_value;
}
case AttributeType::int16:
{
const auto it = attr->int16_map->find(id);
return it != attr->int16_map->end() ? it->second : attr->int16_null_value;
}
case AttributeType::int32:
{
const auto it = attr->int32_map->find(id);
return it != attr->int32_map->end() ? it->second : attr->int32_null_value;
}
case AttributeType::int64:
{
const auto it = attr->int64_map->find(id);
return it != attr->int64_map->end() ? it->second : attr->int64_null_value;
}
case AttributeType::float32:
case AttributeType::float64:
case AttributeType::string:
break;
};
throw Exception{ void toParent(const PODArray<id_t> & ids, PODArray<id_t> & out) const override
"Hierarchical attribute has non-integer type " + toString(hierarchical_attribute->type), {
ErrorCodes::TYPE_MISMATCH const auto & attr = *hierarchical_attribute->uint64_map;
}; const auto null_value = hierarchical_attribute->uint64_null_value;
for (const auto i : ext::range(0, ids.size()))
{
const auto it = attr.find(ids[i]);
out[i] = it != attr.end() ? it->second : null_value;
}
} }
#define DECLARE_INDIVIDUAL_GETTER(TYPE, LC_TYPE) \ #define DECLARE_INDIVIDUAL_GETTER(TYPE, LC_TYPE) \
TYPE get##TYPE(const std::string & attribute_name, const id_t id) const override\ TYPE get##TYPE(const std::string & attribute_name, const id_t id) const override\
{\ {\
const auto idx = getAttributeIndex(attribute_name);\ const auto & attribute = getAttribute(attribute_name);\
const auto & attribute = attributes[idx];\
if (attribute.type != AttributeType::LC_TYPE)\ if (attribute.type != AttributeType::LC_TYPE)\
throw Exception{\ throw Exception{\
"Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\
@ -132,8 +93,7 @@ public:
#define DECLARE_MULTIPLE_GETTER(TYPE, LC_TYPE)\ #define DECLARE_MULTIPLE_GETTER(TYPE, LC_TYPE)\
void get##TYPE(const std::string & attribute_name, const PODArray<id_t> & ids, PODArray<TYPE> & out) const override\ void get##TYPE(const std::string & attribute_name, const PODArray<id_t> & ids, PODArray<TYPE> & out) const override\
{\ {\
const auto idx = getAttributeIndex(attribute_name);\ const auto & attribute = getAttribute(attribute_name);\
const auto & attribute = attributes[idx];\
if (attribute.type != AttributeType::LC_TYPE)\ if (attribute.type != AttributeType::LC_TYPE)\
throw Exception{\ throw Exception{\
"Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\
@ -162,8 +122,7 @@ public:
#undef DECLARE_MULTIPLE_GETTER #undef DECLARE_MULTIPLE_GETTER
void getString(const std::string & attribute_name, const PODArray<id_t> & ids, ColumnString * out) const override void getString(const std::string & attribute_name, const PODArray<id_t> & ids, ColumnString * out) const override
{ {
const auto idx = getAttributeIndex(attribute_name); const auto & attribute = getAttribute(attribute_name);
const auto & attribute = attributes[idx];
if (attribute.type != AttributeType::string) if (attribute.type != AttributeType::string)
throw Exception{ throw Exception{
"Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
@ -221,7 +180,15 @@ private:
attribute.null_value)); attribute.null_value));
if (attribute.hierarchical) if (attribute.hierarchical)
{
hierarchical_attribute = &attributes.back(); hierarchical_attribute = &attributes.back();
if (hierarchical_attribute->type != AttributeType::uint64)
throw Exception{
"Hierarchical attribute must be UInt64.",
ErrorCodes::TYPE_MISMATCH
};
}
} }
} }
@ -367,7 +334,7 @@ private:
}; };
} }
std::size_t getAttributeIndex(const std::string & attribute_name) const const attribute_t & getAttribute(const std::string & attribute_name) const
{ {
const auto it = attribute_index_by_name.find(attribute_name); const auto it = attribute_index_by_name.find(attribute_name);
if (it == std::end(attribute_index_by_name)) if (it == std::end(attribute_index_by_name))
@ -376,7 +343,7 @@ private:
ErrorCodes::BAD_ARGUMENTS ErrorCodes::BAD_ARGUMENTS
}; };
return it->second; return attributes[it->second];
} }
const std::string name; const std::string name;

View File

@ -38,6 +38,7 @@ public:
/// do not call unless you ensure that hasHierarchy() returns true /// do not call unless you ensure that hasHierarchy() returns true
virtual id_t toParent(id_t id) const = 0; virtual id_t toParent(id_t id) const = 0;
virtual void toParent(const PODArray<id_t> & ids, PODArray<id_t> & out) const = 0;
bool in(id_t child_id, const id_t ancestor_id) const bool in(id_t child_id, const id_t ancestor_id) const
{ {