From b793248b8f9932a7273a41edb5ad156e46db8cc3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 25 Feb 2015 22:45:32 +0300 Subject: [PATCH 1/4] dbms: fixed error [#METR-2944]. --- dbms/include/DB/Interpreters/AggregationCommon.h | 4 +++- .../00128_group_by_number_and_fixed_string.reference | 10 ++++++++++ .../00128_group_by_number_and_fixed_string.sql | 1 + 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00128_group_by_number_and_fixed_string.reference create mode 100644 dbms/tests/queries/0_stateless/00128_group_by_number_and_fixed_string.sql diff --git a/dbms/include/DB/Interpreters/AggregationCommon.h b/dbms/include/DB/Interpreters/AggregationCommon.h index 7e2603cc768..c88a4897391 100644 --- a/dbms/include/DB/Interpreters/AggregationCommon.h +++ b/dbms/include/DB/Interpreters/AggregationCommon.h @@ -10,6 +10,7 @@ #include #include #include +#include template <> @@ -56,7 +57,8 @@ static inline T ALWAYS_INLINE packFixed( offset += 8; break; default: - __builtin_unreachable(); + memcpy(bytes + offset, &static_cast(key_columns[j])->getChars()[i * key_sizes[j]], key_sizes[j]); + offset += key_sizes[j]; } } diff --git a/dbms/tests/queries/0_stateless/00128_group_by_number_and_fixed_string.reference b/dbms/tests/queries/0_stateless/00128_group_by_number_and_fixed_string.reference new file mode 100644 index 00000000000..f5ba86c1d71 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00128_group_by_number_and_fixed_string.reference @@ -0,0 +1,10 @@ +99999 +99998 +99997 +99996 +99995 +99994 +99993 +99992 +99991 +99990 diff --git a/dbms/tests/queries/0_stateless/00128_group_by_number_and_fixed_string.sql b/dbms/tests/queries/0_stateless/00128_group_by_number_and_fixed_string.sql new file mode 100644 index 00000000000..25d25c2b751 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00128_group_by_number_and_fixed_string.sql @@ -0,0 +1 @@ +SELECT n, k FROM (SELECT number AS n, toFixedString(materialize(' '), 3) AS k FROM system.numbers LIMIT 100000) GROUP BY n, k ORDER BY n DESC, k LIMIT 10; From 70817eb5a68d39802a2d93337acb2147c87555de Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 26 Feb 2015 17:51:28 +0300 Subject: [PATCH 2/4] dbms: move semantics for PODArray --- dbms/include/DB/Common/PODArray.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dbms/include/DB/Common/PODArray.h b/dbms/include/DB/Common/PODArray.h index 43b93568a97..0b4e0550a30 100644 --- a/dbms/include/DB/Common/PODArray.h +++ b/dbms/include/DB/Common/PODArray.h @@ -177,6 +177,17 @@ public: PODArray(const_iterator from_begin, const_iterator from_end) : use_libc_realloc(false) { alloc(from_end - from_begin); insert(from_begin, from_end); } ~PODArray() { dealloc(); } + PODArray(PODArray && other) { *this = std::move(other); } + PODArray & operator=(PODArray && other) + { + std::swap(c_start, other.c_start); + std::swap(c_end, other.c_end); + std::swap(c_end_of_storage, other.c_end_of_storage); + std::swap(use_libc_realloc, other.use_libc_realloc); + + return *this; + } + size_t size() const { return t_end() - t_start(); } bool empty() const { return t_end() == t_start(); } From cc6ac6b930dde0ac36ab76fe77f43cf004468725 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 26 Feb 2015 17:52:03 +0300 Subject: [PATCH 3/4] dbms: implicit conversion operator for StringRef to std::string --- dbms/include/DB/Core/StringRef.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/include/DB/Core/StringRef.h b/dbms/include/DB/Core/StringRef.h index 4ae264e4f3f..9836f0a6c32 100644 --- a/dbms/include/DB/Core/StringRef.h +++ b/dbms/include/DB/Core/StringRef.h @@ -24,6 +24,8 @@ struct StringRef StringRef() = default; std::string toString() const { return std::string(data, size); } + + operator std::string() const { return toString(); } }; typedef std::vector StringRefs; From 20eb5013a463f83da1f5c3d48234dafc57598b12 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 26 Feb 2015 17:53:33 +0300 Subject: [PATCH 4/4] dbms: cache dictionary revamp [#METR-13298] --- .../include/DB/Dictionaries/CacheDictionary.h | 488 +++++++++++------- .../Dictionaries/ClickHouseDictionarySource.h | 30 +- .../DB/Dictionaries/FileDictionarySource.h | 8 - dbms/include/DB/Dictionaries/FlatDictionary.h | 32 +- .../DB/Dictionaries/HashedDictionary.h | 32 +- dbms/include/DB/Dictionaries/IDictionary.h | 2 +- .../DB/Dictionaries/IDictionarySource.h | 3 - .../DB/Dictionaries/MySQLDictionarySource.h | 8 - .../DB/Functions/FunctionsDictionaries.h | 4 +- dbms/src/Interpreters/DictionaryFactory.cpp | 11 + 10 files changed, 359 insertions(+), 259 deletions(-) diff --git a/dbms/include/DB/Dictionaries/CacheDictionary.h b/dbms/include/DB/Dictionaries/CacheDictionary.h index 62c8d6bb2ed..98912027cdc 100644 --- a/dbms/include/DB/Dictionaries/CacheDictionary.h +++ b/dbms/include/DB/Dictionaries/CacheDictionary.h @@ -3,18 +3,19 @@ #include #include #include +#include #include #include +#include #include #include #include #include +#include namespace DB { -constexpr std::chrono::milliseconds spinlock_wait_time{10}; - class CacheDictionary final : public IDictionary { public: @@ -24,7 +25,7 @@ public: : name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), size{round_up_to_power_of_two(size)}, - cells(this->size, cell{dict_struct.attributes.size()}) + cells{this->size} { if (!this->source_ptr->supportsSelectiveLoad()) throw Exception{ @@ -55,8 +56,8 @@ public: id_t toParent(const id_t id) const override { return 0; } -#define DECLARE_INDIVIDUAL_GETTER(TYPE, NAME, LC_TYPE) \ - TYPE get##NAME(const std::string & attribute_name, const id_t id) const override\ +#define DECLARE_INDIVIDUAL_GETTER(TYPE, LC_TYPE) \ + TYPE get##TYPE(const std::string & attribute_name, const id_t id) const override\ {\ const auto idx = getAttributeIndex(attribute_name);\ const auto & attribute = attributes[idx];\ @@ -66,20 +67,38 @@ public: ErrorCodes::TYPE_MISMATCH\ };\ \ - return getItem(getAttributeIndex(attribute_name), id);\ + PODArray ids{1, id};\ + PODArray out{1};\ + getItems(idx, ids, out);\ + return out.front();\ } - DECLARE_INDIVIDUAL_GETTER(UInt8, UInt8, uint8) - DECLARE_INDIVIDUAL_GETTER(UInt16, UInt16, uint16) - DECLARE_INDIVIDUAL_GETTER(UInt32, UInt32, uint32) - DECLARE_INDIVIDUAL_GETTER(UInt64, UInt64, uint64) - DECLARE_INDIVIDUAL_GETTER(Int8, Int8, int8) - DECLARE_INDIVIDUAL_GETTER(Int16, Int16, int16) - DECLARE_INDIVIDUAL_GETTER(Int32, Int32, int32) - DECLARE_INDIVIDUAL_GETTER(Int64, Int64, int64) - DECLARE_INDIVIDUAL_GETTER(Float32, Float32, float32) - DECLARE_INDIVIDUAL_GETTER(Float64, Float64, float64) - DECLARE_INDIVIDUAL_GETTER(StringRef, String, string) + DECLARE_INDIVIDUAL_GETTER(UInt8, uint8) + DECLARE_INDIVIDUAL_GETTER(UInt16, uint16) + DECLARE_INDIVIDUAL_GETTER(UInt32, uint32) + DECLARE_INDIVIDUAL_GETTER(UInt64, uint64) + DECLARE_INDIVIDUAL_GETTER(Int8, int8) + DECLARE_INDIVIDUAL_GETTER(Int16, int16) + DECLARE_INDIVIDUAL_GETTER(Int32, int32) + DECLARE_INDIVIDUAL_GETTER(Int64, int64) + DECLARE_INDIVIDUAL_GETTER(Float32, float32) + DECLARE_INDIVIDUAL_GETTER(Float64, float64) #undef DECLARE_INDIVIDUAL_GETTER + String getString(const std::string & attribute_name, const id_t id) const override + { + const auto idx = getAttributeIndex(attribute_name); + const auto & attribute = attributes[idx]; + if (attribute.type != AttributeType::string) + throw Exception{ + "Type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH + }; + + PODArray ids{1, id}; + ColumnString out; + getItems(idx, ids, &out); + + return out.getDataAt(0); + } #define DECLARE_MULTIPLE_GETTER(TYPE, LC_TYPE)\ void get##TYPE(const std::string & attribute_name, const PODArray & ids, PODArray & out) const override\ @@ -92,8 +111,7 @@ public: ErrorCodes::TYPE_MISMATCH\ };\ \ - for (const auto i : ext::range(0, ids.size()))\ - out[i] = getItem(idx, ids[i]);\ + getItems(idx, ids, out);\ } DECLARE_MULTIPLE_GETTER(UInt8, uint8) DECLARE_MULTIPLE_GETTER(UInt16, uint16) @@ -116,28 +134,34 @@ public: ErrorCodes::TYPE_MISMATCH }; - for (const auto i : ext::range(0, ids.size())) - { - const auto string_ref = getItem(idx, ids[i]); - out->insertData(string_ref.data, string_ref.size); - } + getItems(idx, ids, out); } private: - struct attribute_t + struct cell_metadata_t final + { + std::uint64_t id; + std::chrono::system_clock::time_point expires_at; + }; + + struct attribute_t final { AttributeType type; - UInt8 uint8_null_value; - UInt16 uint16_null_value; - UInt32 uint32_null_value; - UInt64 uint64_null_value; - Int8 int8_null_value; - Int16 int16_null_value; - Int32 int32_null_value; - Int64 int64_null_value; - Float32 float32_null_value; - Float64 float64_null_value; - String string_null_value; + std::tuple null_values; + std::tuple, + std::unique_ptr, + std::unique_ptr, + std::unique_ptr, + std::unique_ptr, + std::unique_ptr, + std::unique_ptr, + std::unique_ptr, + std::unique_ptr, + std::unique_ptr, + std::unique_ptr> arrays; }; void createAttributes() @@ -148,8 +172,8 @@ private: for (const auto & attribute : dict_struct.attributes) { attribute_index_by_name.emplace(attribute.name, attributes.size()); - attributes.push_back(std::move(createAttributeWithType(getAttributeTypeByName(attribute.type), - attribute.null_value))); + attributes.push_back(createAttributeWithType(getAttributeTypeByName(attribute.type), + attribute.null_value)); if (attribute.hierarchical) hierarchical_attribute = &attributes.back(); @@ -163,166 +187,288 @@ private: switch (type) { case AttributeType::uint8: - attr.uint8_null_value = DB::parse(null_value); + std::get(attr.null_values) = DB::parse(null_value); + std::get>(attr.arrays) = std::make_unique(size); break; case AttributeType::uint16: - attr.uint16_null_value = DB::parse(null_value); + std::get(attr.null_values) = DB::parse(null_value); + std::get>(attr.arrays) = std::make_unique(size); break; case AttributeType::uint32: - attr.uint32_null_value = DB::parse(null_value); + std::get(attr.null_values) = DB::parse(null_value); + std::get>(attr.arrays) = std::make_unique(size); break; case AttributeType::uint64: - attr.uint64_null_value = DB::parse(null_value); + std::get(attr.null_values) = DB::parse(null_value); + std::get>(attr.arrays) = std::make_unique(size); break; case AttributeType::int8: - attr.int8_null_value = DB::parse(null_value); + std::get(attr.null_values) = DB::parse(null_value); + std::get>(attr.arrays) = std::make_unique(size); break; case AttributeType::int16: - attr.int16_null_value = DB::parse(null_value); + std::get(attr.null_values) = DB::parse(null_value); + std::get>(attr.arrays) = std::make_unique(size); break; case AttributeType::int32: - attr.int32_null_value = DB::parse(null_value); + std::get(attr.null_values) = DB::parse(null_value); + std::get>(attr.arrays) = std::make_unique(size); break; case AttributeType::int64: - attr.int64_null_value = DB::parse(null_value); + std::get(attr.null_values) = DB::parse(null_value); + std::get>(attr.arrays) = std::make_unique(size); break; case AttributeType::float32: - attr.float32_null_value = DB::parse(null_value); + std::get(attr.null_values) = DB::parse(null_value); + std::get>(attr.arrays) = std::make_unique(size); break; case AttributeType::float64: - attr.float64_null_value = DB::parse(null_value); + std::get(attr.null_values) = DB::parse(null_value); + std::get>(attr.arrays) = std::make_unique(size); break; case AttributeType::string: - attr.string_null_value = null_value; + std::get(attr.null_values) = null_value; + std::get>(attr.arrays) = std::make_unique(size); break; } return attr; } - union item + static bool hasTimeExpired(const std::chrono::system_clock::time_point & time_point) { - UInt8 uint8_value; - UInt16 uint16_value; - UInt32 uint32_value; - UInt64 uint64_value; - Int8 int8_value; - Int16 int16_value; - Int32 int32_value; - Int64 int64_value; - Float32 float32_value; - Float64 float64_value; - StringRef string_value; - - item() : string_value{} {} - - template inline T get() const = delete; - }; - - struct cell - { - std::atomic_flag lock{false}; - id_t id{}; - std::vector attrs; - std::chrono::system_clock::time_point expires_at{}; - - cell() = default; - cell(const std::size_t attribute_count) : attrs(attribute_count) {} - cell(const cell & other) { *this = other; } - - cell & operator=(const cell & other) - { - id = other.id; - attrs = other.attrs; - expires_at = other.expires_at; - - return *this; - } - - bool hasExpired() const { return std::chrono::system_clock::now() >= expires_at; } - }; - - template - T getItem(const std::size_t attribute_idx, const id_t id) const - { - const auto hash = intHash64(id); - const auto idx = hash % size; - auto & cell = cells[idx]; - - /// spinlock with a bit of throttling - while (cell.lock.test_and_set(std::memory_order_acquire)) - std::this_thread::sleep_for(spinlock_wait_time); - - SCOPE_EXIT( - cell.lock.clear(std::memory_order_release); - ); - - if (cell.id != id || cell.hasExpired()) - populateCellForId(cell, id); - - return cell.attrs[attribute_idx].get(); + return std::chrono::system_clock::now() >= time_point; } - void populateCellForId(cell & cell, const id_t id) const + template + void getItems(const std::size_t attribute_idx, const PODArray & ids, PODArray & out) const { - auto stream = source_ptr->loadId(id); + HashMap> outdated_ids; + auto & attribute = attributes[attribute_idx]; + auto & attribute_array = std::get>(attribute.arrays); + + { + const Poco::ScopedReadRWLock read_lock{rw_lock}; + /// fetch up-to-date values, decide which ones require update + for (const auto i : ext::range(0, ids.size())) + { + const auto id = ids[i]; + if (id == 0) + { + out[i] = std::get(attribute.null_values); + continue; + } + + const auto cell_idx = getCellIdx(id); + const auto & cell = cells[cell_idx]; + + if (cell.id != id || hasTimeExpired(cell.expires_at)) + { + out[i] = std::get(attribute.null_values); + outdated_ids[id].push_back(i); + } + else + out[i] = attribute_array[cell_idx]; + } + } + + if (outdated_ids.empty()) + return; + + /// request new values + std::vector required_ids(outdated_ids.size()); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), + [] (auto & pair) { return pair.first; }); + + update(required_ids, [&] (const auto id, const auto cell_idx) { + const auto attribute_value = attribute_array[cell_idx]; + + /// set missing values to out + for (const auto out_idx : outdated_ids[id]) + out[out_idx] = attribute_value; + }); + } + + void getItems(const std::size_t attribute_idx, const PODArray & ids, ColumnString * out) const + { + /// save on some allocations + out->getOffsets().reserve(ids.size()); + + auto & attribute = attributes[attribute_idx]; + auto & attribute_array = std::get>(attribute.arrays); + + auto found_outdated_values = false; + + /// perform optimistic version, fallback to pessimistic if failed + { + const Poco::ScopedReadRWLock read_lock{rw_lock}; + + /// fetch up-to-date values, discard on fail + for (const auto i : ext::range(0, ids.size())) + { + const auto id = ids[i]; + if (id == 0) + { + const auto & string = std::get(attribute.null_values); + out->insertData(string.data(), string.size()); + continue; + } + + const auto cell_idx = getCellIdx(id); + const auto & cell = cells[cell_idx]; + + if (cell.id != id || hasTimeExpired(cell.expires_at)) + { + found_outdated_values = true; + break; + } + else + { + const auto string_ref = attribute_array[cell_idx]; + out->insertData(string_ref.data, string_ref.size); + } + } + } + + /// optimistic code completed successfully + if (!found_outdated_values) + return; + + /// now onto the pessimistic one, discard possibly partial results from the optimistic path + out->getChars().resize_assume_reserved(0); + out->getOffsets().resize_assume_reserved(0); + + /// outdated ids joined number of times they've been requested + HashMap outdated_ids; + /// we are going to store every string separately + HashMap map; + + std::size_t total_length = 0; + { + const Poco::ScopedReadRWLock read_lock{rw_lock}; + + for (const auto i : ext::range(0, ids.size())) + { + const auto id = ids[i]; + if (id == 0) + { + total_length += 1; + continue; + } + + const auto cell_idx = getCellIdx(id); + const auto & cell = cells[cell_idx]; + + if (cell.id != id || hasTimeExpired(cell.expires_at)) + outdated_ids[id] += 1; + else + { + const auto string_ref = attribute_array[cell_idx]; + map[id] = string_ref; + total_length += string_ref.size + 1; + }; + } + } + + /// request new values + if (!outdated_ids.empty()) + { + std::vector required_ids(outdated_ids.size()); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), + [] (auto & pair) { return pair.first; }); + + update(required_ids, [&] (const auto id, const auto cell_idx) { + const auto attribute_value = attribute_array[cell_idx]; + + map[id] = attribute_value; + total_length += attribute_value.size + 1; + }); + } + + out->getChars().reserve(total_length); + + for (const auto id : ids) + { + const auto it = map.find(id); + const auto string = it != map.end() ? it->second : std::get(attributes[attribute_idx].null_values); + out->insertData(string.data(), string.size()); + } + } + + template + void update(const std::vector ids, F && on_cell_updated) const + { + auto stream = source_ptr->loadIds(ids); stream->readPrefix(); - auto empty_response = true; + const Poco::ScopedWriteRWLock write_lock{rw_lock}; while (const auto block = stream->read()) { - if (!empty_response) + const auto id_column = typeid_cast *>(block.getByPosition(0).column.get()); + if (!id_column) throw Exception{ - "Stream returned from loadId contains more than one block", - ErrorCodes::LOGICAL_ERROR + "Id column has type different from UInt64.", + ErrorCodes::TYPE_MISMATCH }; - if (block.rowsInFirstColumn() != 1) - throw Exception{ - "Block has more than one row", - ErrorCodes::LOGICAL_ERROR - }; + const auto & ids = id_column->getData(); - for (const auto attribute_idx : ext::range(0, attributes.size())) + for (const auto i : ext::range(0, ids.size())) { - const auto & attribute_column = *block.getByPosition(attribute_idx + 1).column; - auto & attribute = attributes[attribute_idx]; + const auto id = ids[i]; + const auto & cell_idx = getCellIdx(id); + auto & cell = cells[cell_idx]; - setAttributeValue(cell.attrs[attribute_idx], attribute, attribute_column[0]); + for (const auto attribute_idx : ext::range(0, attributes.size())) + { + const auto & attribute_column = *block.getByPosition(attribute_idx + 1).column; + auto & attribute = attributes[attribute_idx]; + + setAttributeValue(attribute, cell_idx, attribute_column[i]); + } + + std::uniform_int_distribution distribution{ + dict_lifetime.min_sec, + dict_lifetime.max_sec + }; + + cell.id = id; + cell.expires_at = std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; + + on_cell_updated(id, cell_idx); } - - empty_response = false; } stream->readSuffix(); - - if (empty_response) - setCellDefaults(cell); - - cell.id = id; - cell.expires_at = std::chrono::system_clock::now() + std::chrono::seconds{dict_lifetime.min_sec}; } - void setAttributeValue(item & item, const attribute_t & attribute, const Field & value) const + std::uint64_t getCellIdx(const id_t id) const + { + const auto hash = intHash64(id); + const auto idx = hash & (size - 1); + return idx; + } + + void setAttributeValue(attribute_t & attribute, const id_t idx, const Field & value) const { switch (attribute.type) { - case AttributeType::uint8: item.uint8_value = value.get(); break; - case AttributeType::uint16: item.uint16_value = value.get(); break; - case AttributeType::uint32: item.uint32_value = value.get(); break; - case AttributeType::uint64: item.uint64_value = value.get(); break; - case AttributeType::int8: item.int8_value = value.get(); break; - case AttributeType::int16: item.int16_value = value.get(); break; - case AttributeType::int32: item.int32_value = value.get(); break; - case AttributeType::int64: item.int64_value = value.get(); break; - case AttributeType::float32: item.float32_value = value.get(); break; - case AttributeType::float64: item.float64_value = value.get(); break; + case AttributeType::uint8: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeType::uint16: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeType::uint32: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeType::uint64: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeType::int8: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeType::int16: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeType::int32: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeType::int64: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeType::float32: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeType::float64: std::get>(attribute.arrays)[idx] = value.get(); break; case AttributeType::string: { const auto & string = value.get(); - auto & string_ref = item.string_value; - if (string_ref.data && string_ref.data != attribute.string_null_value.data()) + auto & string_ref = std::get>(attribute.arrays)[idx]; + if (string_ref.data) delete[] string_ref.data; const auto size = string.size(); @@ -340,39 +486,6 @@ private: } } - void setCellDefaults(cell & cell) const - { - for (const auto attribute_idx : ext::range(0, attributes.size())) - { - auto & attribute = attributes[attribute_idx]; - auto & item = cell.attrs[attribute_idx]; - - switch (attribute.type) - { - case AttributeType::uint8: item.uint8_value = attribute.uint8_null_value; break; - case AttributeType::uint16: item.uint16_value = attribute.uint16_null_value; break; - case AttributeType::uint32: item.uint32_value = attribute.uint32_null_value; break; - case AttributeType::uint64: item.uint64_value = attribute.uint64_null_value; break; - case AttributeType::int8: item.int8_value = attribute.int8_null_value; break; - case AttributeType::int16: item.int16_value = attribute.int16_null_value; break; - case AttributeType::int32: item.int32_value = attribute.int32_null_value; break; - case AttributeType::int64: item.int64_value = attribute.int64_null_value; break; - case AttributeType::float32: item.float32_value = attribute.float32_null_value; break; - case AttributeType::float64: item.float64_value = attribute.float64_null_value; break; - case AttributeType::string: - { - auto & string_ref = item.string_value; - if (string_ref.data && string_ref.data != attribute.string_null_value.data()) - delete[] string_ref.data; - - string_ref = attribute.string_null_value; - - break; - } - } - } - } - std::size_t getAttributeIndex(const std::string & attribute_name) const { const auto it = attribute_index_by_name.find(attribute_name); @@ -411,23 +524,14 @@ private: const DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; + mutable Poco::RWLock rw_lock; const std::size_t size; - mutable std::vector cells; std::map attribute_index_by_name; - std::vector attributes; + mutable std::vector attributes; + mutable std::vector cells; const attribute_t * hierarchical_attribute = nullptr; + + mutable std::mt19937_64 rnd_engine{getSeed()}; }; -template <> inline UInt8 CacheDictionary::item::get() const { return uint8_value; } -template <> inline UInt16 CacheDictionary::item::get() const { return uint16_value; } -template <> inline UInt32 CacheDictionary::item::get() const { return uint32_value; } -template <> inline UInt64 CacheDictionary::item::get() const { return uint64_value; } -template <> inline Int8 CacheDictionary::item::get() const { return int8_value; } -template <> inline Int16 CacheDictionary::item::get() const { return int16_value; } -template <> inline Int32 CacheDictionary::item::get() const { return int32_value; } -template <> inline Int64 CacheDictionary::item::get() const { return int64_value; } -template <> inline Float32 CacheDictionary::item::get() const { return float32_value; } -template <> inline Float64 CacheDictionary::item::get() const { return float64_value; } -template <> inline StringRef CacheDictionary::item::get() const { return string_value; } - } diff --git a/dbms/include/DB/Dictionaries/ClickHouseDictionarySource.h b/dbms/include/DB/Dictionaries/ClickHouseDictionarySource.h index 1646d6b319b..df6f42cc888 100644 --- a/dbms/include/DB/Dictionaries/ClickHouseDictionarySource.h +++ b/dbms/include/DB/Dictionaries/ClickHouseDictionarySource.h @@ -60,23 +60,15 @@ public: return new RemoteBlockInputStream{pool.get(), load_all_query, nullptr}; } - BlockInputStreamPtr loadId(const std::uint64_t id) override + BlockInputStreamPtr loadIds(const std::vector ids) override { - const auto query = composeLoadIdQuery(id); + const auto query = composeLoadIdsQuery(ids); if (is_local) return executeQuery(query, context, true).in; return new RemoteBlockInputStream{pool.get(), query, nullptr}; } - BlockInputStreamPtr loadIds(const std::vector ids) override - { - throw Exception{ - "Method unsupported", - ErrorCodes::NOT_IMPLEMENTED - }; - } - bool isModified() const override { return true; } bool supportsSelectiveLoad() const override { return true; } @@ -103,7 +95,7 @@ private: return query; } - std::string composeLoadIdQuery(const id_t id) + std::string composeLoadIdsQuery(const std::vector ids) { std::string query{"SELECT "}; @@ -113,13 +105,25 @@ private: if (!first) query += ", "; - query += sample_block.getByPosition(idx).name; first = false; + query += sample_block.getByPosition(idx).name; } const auto & id_column_name = sample_block.getByPosition(0).name; - query += " FROM " + table + " WHERE " + id_column_name + " IN (" + std::to_string(id) + ");"; + query += " FROM " + table + " WHERE " + id_column_name + " IN ("; + + first = true; + for (const auto id : ids) + { + if (!first) + query += ','; + + first = false; + query += toString(id); + } + + query += ");"; return query; } diff --git a/dbms/include/DB/Dictionaries/FileDictionarySource.h b/dbms/include/DB/Dictionaries/FileDictionarySource.h index 52659058b36..bcf42210cbb 100644 --- a/dbms/include/DB/Dictionaries/FileDictionarySource.h +++ b/dbms/include/DB/Dictionaries/FileDictionarySource.h @@ -38,14 +38,6 @@ public: return new OwningBufferBlockInputStream{stream, std::move(in_ptr)}; } - BlockInputStreamPtr loadId(const std::uint64_t id) override - { - throw Exception{ - "Method unsupported", - ErrorCodes::NOT_IMPLEMENTED - }; - } - BlockInputStreamPtr loadIds(const std::vector ids) override { throw Exception{ diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index f4029052a8e..52528e9e487 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -69,8 +69,8 @@ public: }; } -#define DECLARE_INDIVIDUAL_GETTER(TYPE, NAME, LC_TYPE) \ - TYPE get##NAME(const std::string & attribute_name, const id_t id) const override\ +#define DECLARE_INDIVIDUAL_GETTER(TYPE, LC_TYPE) \ + TYPE get##TYPE(const std::string & attribute_name, const id_t id) const override\ {\ const auto idx = getAttributeIndex(attribute_name);\ const auto & attribute = attributes[idx];\ @@ -83,17 +83,17 @@ public: return (*attribute.LC_TYPE##_array)[id];\ return attribute.LC_TYPE##_null_value;\ } - DECLARE_INDIVIDUAL_GETTER(UInt8, UInt8, uint8) - DECLARE_INDIVIDUAL_GETTER(UInt16, UInt16, uint16) - DECLARE_INDIVIDUAL_GETTER(UInt32, UInt32, uint32) - DECLARE_INDIVIDUAL_GETTER(UInt64, UInt64, uint64) - DECLARE_INDIVIDUAL_GETTER(Int8, Int8, int8) - DECLARE_INDIVIDUAL_GETTER(Int16, Int16, int16) - DECLARE_INDIVIDUAL_GETTER(Int32, Int32, int32) - DECLARE_INDIVIDUAL_GETTER(Int64, Int64, int64) - DECLARE_INDIVIDUAL_GETTER(Float32, Float32, float32) - DECLARE_INDIVIDUAL_GETTER(Float64, Float64, float64) - DECLARE_INDIVIDUAL_GETTER(StringRef, String, string) + DECLARE_INDIVIDUAL_GETTER(UInt8, uint8) + DECLARE_INDIVIDUAL_GETTER(UInt16, uint16) + DECLARE_INDIVIDUAL_GETTER(UInt32, uint32) + DECLARE_INDIVIDUAL_GETTER(UInt64, uint64) + DECLARE_INDIVIDUAL_GETTER(Int8, int8) + DECLARE_INDIVIDUAL_GETTER(Int16, int16) + DECLARE_INDIVIDUAL_GETTER(Int32, int32) + DECLARE_INDIVIDUAL_GETTER(Int64, int64) + DECLARE_INDIVIDUAL_GETTER(Float32, float32) + DECLARE_INDIVIDUAL_GETTER(Float64, float64) + DECLARE_INDIVIDUAL_GETTER(String, string) #undef DECLARE_INDIVIDUAL_GETTER #define DECLARE_MULTIPLE_GETTER(TYPE, LC_TYPE)\ @@ -143,7 +143,7 @@ public: for (const auto i : ext::range(0, ids.size())) { const auto id = ids[i]; - const auto string_ref = id < attr.size() ? attr[id] : null_value; + const auto string_ref = id < attr.size() ? attr[id] : StringRef{null_value}; out->insertData(string_ref.data, string_ref.size); } } @@ -184,8 +184,8 @@ private: for (const auto & attribute : dict_struct.attributes) { attribute_index_by_name.emplace(attribute.name, attributes.size()); - attributes.push_back(std::move(createAttributeWithType(getAttributeTypeByName(attribute.type), - attribute.null_value))); + attributes.push_back(createAttributeWithType(getAttributeTypeByName(attribute.type), + attribute.null_value)); if (attribute.hierarchical) hierarchical_attribute = &attributes.back(); diff --git a/dbms/include/DB/Dictionaries/HashedDictionary.h b/dbms/include/DB/Dictionaries/HashedDictionary.h index ef7839b817a..c46c7d94766 100644 --- a/dbms/include/DB/Dictionaries/HashedDictionary.h +++ b/dbms/include/DB/Dictionaries/HashedDictionary.h @@ -99,8 +99,8 @@ public: }; } -#define DECLARE_INDIVIDUAL_GETTER(TYPE, NAME, LC_TYPE) \ - TYPE get##NAME(const std::string & attribute_name, const id_t id) const override\ +#define DECLARE_INDIVIDUAL_GETTER(TYPE, LC_TYPE) \ + TYPE get##TYPE(const std::string & attribute_name, const id_t id) const override\ {\ const auto idx = getAttributeIndex(attribute_name);\ const auto & attribute = attributes[idx];\ @@ -116,17 +116,17 @@ public: \ return attribute.LC_TYPE##_null_value;\ } - DECLARE_INDIVIDUAL_GETTER(UInt8, UInt8, uint8) - DECLARE_INDIVIDUAL_GETTER(UInt16, UInt16, uint16) - DECLARE_INDIVIDUAL_GETTER(UInt32, UInt32, uint32) - DECLARE_INDIVIDUAL_GETTER(UInt64, UInt64, uint64) - DECLARE_INDIVIDUAL_GETTER(Int8, Int8, int8) - DECLARE_INDIVIDUAL_GETTER(Int16, Int16, int16) - DECLARE_INDIVIDUAL_GETTER(Int32, Int32, int32) - DECLARE_INDIVIDUAL_GETTER(Int64, Int64, int64) - DECLARE_INDIVIDUAL_GETTER(Float32, Float32, float32) - DECLARE_INDIVIDUAL_GETTER(Float64, Float64, float64) - DECLARE_INDIVIDUAL_GETTER(StringRef, String, string) + DECLARE_INDIVIDUAL_GETTER(UInt8, uint8) + DECLARE_INDIVIDUAL_GETTER(UInt16, uint16) + DECLARE_INDIVIDUAL_GETTER(UInt32, uint32) + DECLARE_INDIVIDUAL_GETTER(UInt64, uint64) + DECLARE_INDIVIDUAL_GETTER(Int8, int8) + DECLARE_INDIVIDUAL_GETTER(Int16, int16) + DECLARE_INDIVIDUAL_GETTER(Int32, int32) + DECLARE_INDIVIDUAL_GETTER(Int64, int64) + DECLARE_INDIVIDUAL_GETTER(Float32, float32) + DECLARE_INDIVIDUAL_GETTER(Float64, float64) + DECLARE_INDIVIDUAL_GETTER(String, string) #undef DECLARE_INDIVIDUAL_GETTER #define DECLARE_MULTIPLE_GETTER(TYPE, LC_TYPE)\ @@ -176,7 +176,7 @@ public: for (const auto i : ext::range(0, ids.size())) { const auto it = attr.find(ids[i]); - const auto string_ref = it != attr.end() ? it->second : null_value; + const auto string_ref = it != attr.end() ? it->second : StringRef{null_value}; out->insertData(string_ref.data, string_ref.size); } } @@ -217,8 +217,8 @@ private: for (const auto & attribute : dict_struct.attributes) { attribute_index_by_name.emplace(attribute.name, attributes.size()); - attributes.push_back(std::move(createAttributeWithType(getAttributeTypeByName(attribute.type), - attribute.null_value))); + attributes.push_back(createAttributeWithType(getAttributeTypeByName(attribute.type), + attribute.null_value)); if (attribute.hierarchical) hierarchical_attribute = &attributes.back(); diff --git a/dbms/include/DB/Dictionaries/IDictionary.h b/dbms/include/DB/Dictionaries/IDictionary.h index d7712ac6f10..d16d6d92c8b 100644 --- a/dbms/include/DB/Dictionaries/IDictionary.h +++ b/dbms/include/DB/Dictionaries/IDictionary.h @@ -58,7 +58,7 @@ public: virtual Int64 getInt64(const std::string & attribute_name, id_t id) const = 0; virtual Float32 getFloat32(const std::string & attribute_name, id_t id) const = 0; virtual Float64 getFloat64(const std::string & attribute_name, id_t id) const = 0; - virtual StringRef getString(const std::string & attribute_name, id_t id) const = 0; + virtual String getString(const std::string & attribute_name, id_t id) const = 0; /// functions for multiple access virtual void getUInt8(const std::string & attr_name, const PODArray & ids, PODArray & out) const = 0; diff --git a/dbms/include/DB/Dictionaries/IDictionarySource.h b/dbms/include/DB/Dictionaries/IDictionarySource.h index 3a0eea143db..9e98c6ce0ae 100644 --- a/dbms/include/DB/Dictionaries/IDictionarySource.h +++ b/dbms/include/DB/Dictionaries/IDictionarySource.h @@ -24,9 +24,6 @@ public: */ virtual bool supportsSelectiveLoad() const = 0; - /// returns an input stream with the data for the requested identifier - virtual BlockInputStreamPtr loadId(const std::uint64_t id) = 0; - /// returns an input stream with the data for a collection of identifiers virtual BlockInputStreamPtr loadIds(const std::vector ids) = 0; diff --git a/dbms/include/DB/Dictionaries/MySQLDictionarySource.h b/dbms/include/DB/Dictionaries/MySQLDictionarySource.h index dd1f49aca75..6c7b16ed3f8 100644 --- a/dbms/include/DB/Dictionaries/MySQLDictionarySource.h +++ b/dbms/include/DB/Dictionaries/MySQLDictionarySource.h @@ -39,14 +39,6 @@ public: return new MySQLBlockInputStream{pool.Get()->query(load_all_query), sample_block, max_block_size}; } - BlockInputStreamPtr loadId(const std::uint64_t id) override - { - throw Exception{ - "Method unsupported", - ErrorCodes::NOT_IMPLEMENTED - }; - } - BlockInputStreamPtr loadIds(const std::vector ids) override { throw Exception{ diff --git a/dbms/include/DB/Functions/FunctionsDictionaries.h b/dbms/include/DB/Functions/FunctionsDictionaries.h index d517f5d2755..e9125da5e51 100644 --- a/dbms/include/DB/Functions/FunctionsDictionaries.h +++ b/dbms/include/DB/Functions/FunctionsDictionaries.h @@ -779,7 +779,7 @@ private: { throw Exception{ "Illegal type " + arguments[2]->getName() + " of third argument of function " + getName() - + ", muste be UInt64.", + + ", must be UInt64.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT }; } @@ -836,7 +836,7 @@ private: { block.getByPosition(result).column = new ColumnConst{ id_col->size(), - dictionary->getString(attr_name, id_col->getData()).toString() + dictionary->getString(attr_name, id_col->getData()) }; } else diff --git a/dbms/src/Interpreters/DictionaryFactory.cpp b/dbms/src/Interpreters/DictionaryFactory.cpp index 8f4c3b21cb0..d36926a0c29 100644 --- a/dbms/src/Interpreters/DictionaryFactory.cpp +++ b/dbms/src/Interpreters/DictionaryFactory.cpp @@ -39,6 +39,17 @@ DictionaryPtr DictionaryFactory::create(const std::string & name, Poco::Util::Ab { return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); } + else if ("cache" == layout_type) + { + const auto size = config.getInt(layout_prefix + ".cache.size"); + if (size == 0) + throw Exception{ + "Dictionary of type 'cache' cannot have size of 0 bytes", + ErrorCodes::TOO_SMALL_BUFFER_SIZE + }; + + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, size); + } throw Exception{ "Unknown dictionary layout type: " + layout_type,