Blocks simplification [#METR-22173].

This commit is contained in:
Alexey Milovidov 2016-08-05 00:40:20 +03:00
parent ca044bf0de
commit 093353d0ad
2 changed files with 87 additions and 104 deletions

View File

@ -2,14 +2,13 @@
#include <vector>
#include <map>
#include <list>
#include <initializer_list>
#include <DB/Core/BlockInfo.h>
#include <DB/Core/ColumnWithTypeAndName.h>
#include <DB/Core/NamesAndTypes.h>
#include <DB/Common/Exception.h>
#include <DB/Core/ColumnWithTypeAndName.h>
#include <DB/Core/ColumnsWithTypeAndName.h>
#include <DB/Common/Exception.h>
namespace DB
@ -24,15 +23,12 @@ class Context;
class Block
{
public:
using Container_t = std::list<ColumnWithTypeAndName>;
using IndexByPosition_t = std::vector<Container_t::iterator>;
using IndexByName_t = std::map<String, Container_t::iterator>;
private:
Container_t data;
IndexByPosition_t index_by_position;
IndexByName_t index_by_name;
using Container = std::vector<ColumnWithTypeAndName>;
using IndexByName = std::map<String, size_t>;
Container data;
IndexByName index_by_name;
public:
BlockInfo info;
@ -40,26 +36,23 @@ public:
Block() = default;
Block(std::initializer_list<ColumnWithTypeAndName> il) : data{il}
{
index_by_position.reserve(il.size());
for (auto it = std::begin(data); it != std::end(data); ++it)
size_t i = 0;
for (const auto & elem : il)
{
index_by_name[it->name] = it;
index_by_position.push_back(it);
index_by_name[elem.name] = i;
++i;
}
}
/// нужны, чтобы правильно скопировались индексы
Block(const Block & other);
Block(Block && other) = default;
Block & operator= (const Block & other);
Block & operator= (Block && other) = default;
/// вставить столбец в заданную позицию
void insert(size_t position, const ColumnWithTypeAndName & elem);
void insert(size_t position, ColumnWithTypeAndName && elem);
/// вставить столбец в конец
void insert(const ColumnWithTypeAndName & elem);
void insert(ColumnWithTypeAndName && elem);
/// вставить столбец в конец, если столбца с таким именем ещё нет
void insertUnique(const ColumnWithTypeAndName & elem);
void insertUnique(ColumnWithTypeAndName && elem);
/// удалить столбец в заданной позиции
void erase(size_t position);
/// удалить столбец с заданным именем
@ -67,11 +60,13 @@ public:
/// Добавляет в блок недостающие столбцы со значениями по-умолчанию
void addDefaults(const NamesAndTypesList & required_columns);
/// References are invalidated after calling functions above.
ColumnWithTypeAndName & getByPosition(size_t position);
const ColumnWithTypeAndName & getByPosition(size_t position) const;
ColumnWithTypeAndName & unsafeGetByPosition(size_t position) { return *index_by_position[position]; }
const ColumnWithTypeAndName & unsafeGetByPosition(size_t position) const { return *index_by_position[position]; }
ColumnWithTypeAndName & unsafeGetByPosition(size_t position) { return data[position]; }
const ColumnWithTypeAndName & unsafeGetByPosition(size_t position) const { return data[position]; }
ColumnWithTypeAndName & getByName(const std::string & name);
const ColumnWithTypeAndName & getByName(const std::string & name) const;
@ -92,13 +87,13 @@ public:
*/
size_t rowsInFirstColumn() const;
size_t columns() const { return index_by_position.size(); }
size_t columns() const { return data.size(); }
/// Приблизительное количество байт в оперативке - для профайлинга.
size_t bytes() const;
operator bool() const { return !index_by_position.empty(); }
bool operator!() const { return index_by_position.empty(); }
operator bool() const { return !data.empty(); }
bool operator!() const { return data.empty(); }
/** Получить список имён столбцов через запятую. */
std::string dumpNames() const;
@ -121,6 +116,9 @@ public:
void clear();
void swap(Block & other) noexcept;
private:
void eraseImpl(size_t position);
};
using Blocks = std::vector<Block>;

View File

@ -28,12 +28,6 @@ namespace ErrorCodes
}
Block::Block(const Block & other)
{
*this = other;
}
void Block::addDefaults(const NamesAndTypesList & required_columns)
{
/// Для недостающих столбцов из вложенной структуры нужно создавать не столбец пустых массивов, а столбец массивов правильных длин.
@ -90,52 +84,38 @@ void Block::addDefaults(const NamesAndTypesList & required_columns)
}
}
Block & Block::operator= (const Block & other)
{
info = other.info;
data = other.data;
index_by_position.resize(data.size());
index_by_name.clear();
size_t pos = 0;
for (Container_t::iterator it = data.begin(); it != data.end(); ++it, ++pos)
{
index_by_position[pos] = it;
index_by_name[it->name] = it;
}
return *this;
}
void Block::insert(size_t position, const ColumnWithTypeAndName & elem)
{
if (position > index_by_position.size())
if (position > data.size())
throw Exception("Position out of bound in Block::insert(), max position = "
+ toString(index_by_position.size()), ErrorCodes::POSITION_OUT_OF_BOUND);
+ toString(data.size()), ErrorCodes::POSITION_OUT_OF_BOUND);
if (position == index_by_position.size())
{
insert(elem);
return;
}
index_by_name[elem.name] = position;
data.emplace(data.begin() + position, elem);
}
Container_t::iterator it = data.insert(index_by_position[position], elem);
index_by_name[elem.name] = it;
void Block::insert(size_t position, ColumnWithTypeAndName && elem)
{
if (position > data.size())
throw Exception("Position out of bound in Block::insert(), max position = "
+ toString(data.size()), ErrorCodes::POSITION_OUT_OF_BOUND);
index_by_position.resize(index_by_position.size() + 1);
for (size_t i = index_by_position.size() - 1; i > position; --i)
index_by_position[i] = index_by_position[i - 1];
index_by_position[position] = it;
index_by_name[elem.name] = position;
data.emplace(data.begin() + position, std::move(elem));
}
void Block::insert(const ColumnWithTypeAndName & elem)
{
Container_t::iterator it = data.insert(data.end(), elem);
index_by_name[elem.name] = it;
index_by_position.push_back(it);
index_by_name[elem.name] = data.size();
data.emplace_back(elem);
}
void Block::insert(ColumnWithTypeAndName && elem)
{
index_by_name[elem.name] = data.size();
data.emplace_back(std::move(elem));
}
@ -145,97 +125,104 @@ void Block::insertUnique(const ColumnWithTypeAndName & elem)
insert(elem);
}
void Block::insertUnique(ColumnWithTypeAndName && elem)
{
if (index_by_name.end() == index_by_name.find(elem.name))
insert(std::move(elem));
}
void Block::erase(size_t position)
{
if (index_by_position.empty())
if (data.empty())
throw Exception("Block is empty", ErrorCodes::POSITION_OUT_OF_BOUND);
if (position >= index_by_position.size())
if (position >= data.size())
throw Exception("Position out of bound in Block::erase(), max position = "
+ toString(index_by_position.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND);
+ toString(data.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND);
Container_t::iterator it = index_by_position[position];
auto index_by_name_it = index_by_name.find(it->name);
if (index_by_name.end() != index_by_name_it)
index_by_name.erase(index_by_name_it);
data.erase(it);
eraseImpl(position);
}
for (size_t i = position, size = index_by_position.size() - 1; i < size; ++i)
index_by_position[i] = index_by_position[i + 1];
index_by_position.resize(index_by_position.size() - 1);
void Block::eraseImpl(size_t position)
{
data.erase(data.begin() + position);
for (auto it = index_by_name.begin(); it != index_by_name.end();)
{
if (it->second == position)
index_by_name.erase(it++);
else
{
if (it->second > position)
--it->second;
++it;
}
}
}
void Block::erase(const String & name)
{
IndexByName_t::iterator index_it = index_by_name.find(name);
auto index_it = index_by_name.find(name);
if (index_it == index_by_name.end())
throw Exception("No such name in Block::erase(): '"
+ name + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
Container_t::iterator it = index_it->second;
index_by_name.erase(index_it);
size_t position = std::distance(data.begin(), it);
data.erase(it);
for (size_t i = position, size = index_by_position.size() - 1; i < size; ++i)
index_by_position[i] = index_by_position[i + 1];
index_by_position.resize(index_by_position.size() - 1);
eraseImpl(index_it->second);
}
ColumnWithTypeAndName & Block::getByPosition(size_t position)
{
if (index_by_position.empty())
if (data.empty())
throw Exception("Block is empty", ErrorCodes::POSITION_OUT_OF_BOUND);
if (position >= index_by_position.size())
if (position >= data.size())
throw Exception("Position " + toString(position)
+ " is out of bound in Block::getByPosition(), max position = "
+ toString(index_by_position.size() - 1)
+ toString(data.size() - 1)
+ ", there are columns: " + dumpNames(), ErrorCodes::POSITION_OUT_OF_BOUND);
return *index_by_position[position];
return data[position];
}
const ColumnWithTypeAndName & Block::getByPosition(size_t position) const
{
if (index_by_position.empty())
if (data.empty())
throw Exception("Block is empty", ErrorCodes::POSITION_OUT_OF_BOUND);
if (position >= index_by_position.size())
if (position >= data.size())
throw Exception("Position " + toString(position)
+ " is out of bound in Block::getByPosition(), max position = "
+ toString(index_by_position.size() - 1)
+ toString(data.size() - 1)
+ ", there are columns: " + dumpNames(), ErrorCodes::POSITION_OUT_OF_BOUND);
return *index_by_position[position];
return data[position];
}
ColumnWithTypeAndName & Block::getByName(const std::string & name)
{
IndexByName_t::const_iterator it = index_by_name.find(name);
auto it = index_by_name.find(name);
if (index_by_name.end() == it)
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
return *it->second;
return data[it->second];
}
const ColumnWithTypeAndName & Block::getByName(const std::string & name) const
{
IndexByName_t::const_iterator it = index_by_name.find(name);
auto it = index_by_name.find(name);
if (index_by_name.end() == it)
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
return *it->second;
return data[it->second];
}
@ -247,12 +234,12 @@ bool Block::has(const std::string & name) const
size_t Block::getPositionByName(const std::string & name) const
{
IndexByName_t::const_iterator it = index_by_name.find(name);
auto it = index_by_name.find(name);
if (index_by_name.end() == it)
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
return std::distance(const_cast<Container_t &>(data).begin(), it->second);
return it->second;
}
@ -346,7 +333,7 @@ Block Block::sortColumns() const
Block sorted_block;
for (const auto & name : index_by_name)
sorted_block.insert(*name.second);
sorted_block.insert(data[name.second]);
return sorted_block;
}
@ -446,7 +433,6 @@ void Block::clear()
info = BlockInfo();
data.clear();
index_by_name.clear();
index_by_position.clear();
}
void Block::swap(Block & other) noexcept
@ -454,7 +440,6 @@ void Block::swap(Block & other) noexcept
std::swap(info, other.info);
data.swap(other.data);
index_by_name.swap(other.index_by_name);
index_by_position.swap(other.index_by_position);
}
}