From 093353d0adf01cf808f44f3f98b3af5afe6214f9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 5 Aug 2016 00:40:20 +0300 Subject: [PATCH] Blocks simplification [#METR-22173]. --- dbms/include/DB/Core/Block.h | 50 ++++++------- dbms/src/Core/Block.cpp | 141 ++++++++++++++++------------------- 2 files changed, 87 insertions(+), 104 deletions(-) diff --git a/dbms/include/DB/Core/Block.h b/dbms/include/DB/Core/Block.h index 452e87c649c..29fc13ecf9c 100644 --- a/dbms/include/DB/Core/Block.h +++ b/dbms/include/DB/Core/Block.h @@ -2,14 +2,13 @@ #include #include -#include #include #include -#include #include -#include +#include #include +#include namespace DB @@ -24,15 +23,12 @@ class Context; class Block { -public: - using Container_t = std::list; - using IndexByPosition_t = std::vector; - using IndexByName_t = std::map; - private: - Container_t data; - IndexByPosition_t index_by_position; - IndexByName_t index_by_name; + using Container = std::vector; + using IndexByName = std::map; + + Container data; + IndexByName index_by_name; public: BlockInfo info; @@ -40,26 +36,23 @@ public: Block() = default; Block(std::initializer_list il) : data{il} { - index_by_position.reserve(il.size()); - for (auto it = std::begin(data); it != std::end(data); ++it) + size_t i = 0; + for (const auto & elem : il) { - index_by_name[it->name] = it; - index_by_position.push_back(it); + index_by_name[elem.name] = i; + ++i; } } - /// нужны, чтобы правильно скопировались индексы - Block(const Block & other); - Block(Block && other) = default; - Block & operator= (const Block & other); - Block & operator= (Block && other) = default; - /// вставить столбец в заданную позицию void insert(size_t position, const ColumnWithTypeAndName & elem); + void insert(size_t position, ColumnWithTypeAndName && elem); /// вставить столбец в конец void insert(const ColumnWithTypeAndName & elem); + void insert(ColumnWithTypeAndName && elem); /// вставить столбец в конец, если столбца с таким именем ещё нет void insertUnique(const ColumnWithTypeAndName & elem); + void insertUnique(ColumnWithTypeAndName && elem); /// удалить столбец в заданной позиции void erase(size_t position); /// удалить столбец с заданным именем @@ -67,11 +60,13 @@ public: /// Добавляет в блок недостающие столбцы со значениями по-умолчанию void addDefaults(const NamesAndTypesList & required_columns); + /// References are invalidated after calling functions above. + ColumnWithTypeAndName & getByPosition(size_t position); const ColumnWithTypeAndName & getByPosition(size_t position) const; - ColumnWithTypeAndName & unsafeGetByPosition(size_t position) { return *index_by_position[position]; } - const ColumnWithTypeAndName & unsafeGetByPosition(size_t position) const { return *index_by_position[position]; } + ColumnWithTypeAndName & unsafeGetByPosition(size_t position) { return data[position]; } + const ColumnWithTypeAndName & unsafeGetByPosition(size_t position) const { return data[position]; } ColumnWithTypeAndName & getByName(const std::string & name); const ColumnWithTypeAndName & getByName(const std::string & name) const; @@ -92,13 +87,13 @@ public: */ size_t rowsInFirstColumn() const; - size_t columns() const { return index_by_position.size(); } + size_t columns() const { return data.size(); } /// Приблизительное количество байт в оперативке - для профайлинга. size_t bytes() const; - operator bool() const { return !index_by_position.empty(); } - bool operator!() const { return index_by_position.empty(); } + operator bool() const { return !data.empty(); } + bool operator!() const { return data.empty(); } /** Получить список имён столбцов через запятую. */ std::string dumpNames() const; @@ -121,6 +116,9 @@ public: void clear(); void swap(Block & other) noexcept; + +private: + void eraseImpl(size_t position); }; using Blocks = std::vector; diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 4c1f3d8f6d8..459542717d2 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -28,12 +28,6 @@ namespace ErrorCodes } -Block::Block(const Block & other) -{ - *this = other; -} - - void Block::addDefaults(const NamesAndTypesList & required_columns) { /// Для недостающих столбцов из вложенной структуры нужно создавать не столбец пустых массивов, а столбец массивов правильных длин. @@ -90,52 +84,38 @@ void Block::addDefaults(const NamesAndTypesList & required_columns) } } -Block & Block::operator= (const Block & other) -{ - info = other.info; - data = other.data; - - index_by_position.resize(data.size()); - index_by_name.clear(); - - size_t pos = 0; - for (Container_t::iterator it = data.begin(); it != data.end(); ++it, ++pos) - { - index_by_position[pos] = it; - index_by_name[it->name] = it; - } - - return *this; -} void Block::insert(size_t position, const ColumnWithTypeAndName & elem) { - if (position > index_by_position.size()) + if (position > data.size()) throw Exception("Position out of bound in Block::insert(), max position = " - + toString(index_by_position.size()), ErrorCodes::POSITION_OUT_OF_BOUND); + + toString(data.size()), ErrorCodes::POSITION_OUT_OF_BOUND); - if (position == index_by_position.size()) - { - insert(elem); - return; - } + index_by_name[elem.name] = position; + data.emplace(data.begin() + position, elem); +} - Container_t::iterator it = data.insert(index_by_position[position], elem); - index_by_name[elem.name] = it; +void Block::insert(size_t position, ColumnWithTypeAndName && elem) +{ + if (position > data.size()) + throw Exception("Position out of bound in Block::insert(), max position = " + + toString(data.size()), ErrorCodes::POSITION_OUT_OF_BOUND); - index_by_position.resize(index_by_position.size() + 1); - for (size_t i = index_by_position.size() - 1; i > position; --i) - index_by_position[i] = index_by_position[i - 1]; - - index_by_position[position] = it; + index_by_name[elem.name] = position; + data.emplace(data.begin() + position, std::move(elem)); } void Block::insert(const ColumnWithTypeAndName & elem) { - Container_t::iterator it = data.insert(data.end(), elem); - index_by_name[elem.name] = it; - index_by_position.push_back(it); + index_by_name[elem.name] = data.size(); + data.emplace_back(elem); +} + +void Block::insert(ColumnWithTypeAndName && elem) +{ + index_by_name[elem.name] = data.size(); + data.emplace_back(std::move(elem)); } @@ -145,97 +125,104 @@ void Block::insertUnique(const ColumnWithTypeAndName & elem) insert(elem); } +void Block::insertUnique(ColumnWithTypeAndName && elem) +{ + if (index_by_name.end() == index_by_name.find(elem.name)) + insert(std::move(elem)); +} + void Block::erase(size_t position) { - if (index_by_position.empty()) + if (data.empty()) throw Exception("Block is empty", ErrorCodes::POSITION_OUT_OF_BOUND); - if (position >= index_by_position.size()) + if (position >= data.size()) throw Exception("Position out of bound in Block::erase(), max position = " - + toString(index_by_position.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND); + + toString(data.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND); - Container_t::iterator it = index_by_position[position]; - auto index_by_name_it = index_by_name.find(it->name); - if (index_by_name.end() != index_by_name_it) - index_by_name.erase(index_by_name_it); - data.erase(it); + eraseImpl(position); +} - for (size_t i = position, size = index_by_position.size() - 1; i < size; ++i) - index_by_position[i] = index_by_position[i + 1]; - index_by_position.resize(index_by_position.size() - 1); +void Block::eraseImpl(size_t position) +{ + data.erase(data.begin() + position); + + for (auto it = index_by_name.begin(); it != index_by_name.end();) + { + if (it->second == position) + index_by_name.erase(it++); + else + { + if (it->second > position) + --it->second; + ++it; + } + } } void Block::erase(const String & name) { - IndexByName_t::iterator index_it = index_by_name.find(name); + auto index_it = index_by_name.find(name); if (index_it == index_by_name.end()) throw Exception("No such name in Block::erase(): '" + name + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); - Container_t::iterator it = index_it->second; - index_by_name.erase(index_it); - size_t position = std::distance(data.begin(), it); - data.erase(it); - - for (size_t i = position, size = index_by_position.size() - 1; i < size; ++i) - index_by_position[i] = index_by_position[i + 1]; - - index_by_position.resize(index_by_position.size() - 1); + eraseImpl(index_it->second); } ColumnWithTypeAndName & Block::getByPosition(size_t position) { - if (index_by_position.empty()) + if (data.empty()) throw Exception("Block is empty", ErrorCodes::POSITION_OUT_OF_BOUND); - if (position >= index_by_position.size()) + if (position >= data.size()) throw Exception("Position " + toString(position) + " is out of bound in Block::getByPosition(), max position = " - + toString(index_by_position.size() - 1) + + toString(data.size() - 1) + ", there are columns: " + dumpNames(), ErrorCodes::POSITION_OUT_OF_BOUND); - return *index_by_position[position]; + return data[position]; } const ColumnWithTypeAndName & Block::getByPosition(size_t position) const { - if (index_by_position.empty()) + if (data.empty()) throw Exception("Block is empty", ErrorCodes::POSITION_OUT_OF_BOUND); - if (position >= index_by_position.size()) + if (position >= data.size()) throw Exception("Position " + toString(position) + " is out of bound in Block::getByPosition(), max position = " - + toString(index_by_position.size() - 1) + + toString(data.size() - 1) + ", there are columns: " + dumpNames(), ErrorCodes::POSITION_OUT_OF_BOUND); - return *index_by_position[position]; + return data[position]; } ColumnWithTypeAndName & Block::getByName(const std::string & name) { - IndexByName_t::const_iterator it = index_by_name.find(name); + auto it = index_by_name.find(name); if (index_by_name.end() == it) throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames() , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); - return *it->second; + return data[it->second]; } const ColumnWithTypeAndName & Block::getByName(const std::string & name) const { - IndexByName_t::const_iterator it = index_by_name.find(name); + auto it = index_by_name.find(name); if (index_by_name.end() == it) throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames() , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); - return *it->second; + return data[it->second]; } @@ -247,12 +234,12 @@ bool Block::has(const std::string & name) const size_t Block::getPositionByName(const std::string & name) const { - IndexByName_t::const_iterator it = index_by_name.find(name); + auto it = index_by_name.find(name); if (index_by_name.end() == it) throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames() , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); - return std::distance(const_cast(data).begin(), it->second); + return it->second; } @@ -346,7 +333,7 @@ Block Block::sortColumns() const Block sorted_block; for (const auto & name : index_by_name) - sorted_block.insert(*name.second); + sorted_block.insert(data[name.second]); return sorted_block; } @@ -446,7 +433,6 @@ void Block::clear() info = BlockInfo(); data.clear(); index_by_name.clear(); - index_by_position.clear(); } void Block::swap(Block & other) noexcept @@ -454,7 +440,6 @@ void Block::swap(Block & other) noexcept std::swap(info, other.info); data.swap(other.data); index_by_name.swap(other.index_by_name); - index_by_position.swap(other.index_by_position); } }