mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Blocks simplification [#METR-22173].
This commit is contained in:
parent
ca044bf0de
commit
093353d0ad
@ -2,14 +2,13 @@
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <list>
|
||||
#include <initializer_list>
|
||||
|
||||
#include <DB/Core/BlockInfo.h>
|
||||
#include <DB/Core/ColumnWithTypeAndName.h>
|
||||
#include <DB/Core/NamesAndTypes.h>
|
||||
#include <DB/Common/Exception.h>
|
||||
#include <DB/Core/ColumnWithTypeAndName.h>
|
||||
#include <DB/Core/ColumnsWithTypeAndName.h>
|
||||
#include <DB/Common/Exception.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -24,15 +23,12 @@ class Context;
|
||||
|
||||
class Block
|
||||
{
|
||||
public:
|
||||
using Container_t = std::list<ColumnWithTypeAndName>;
|
||||
using IndexByPosition_t = std::vector<Container_t::iterator>;
|
||||
using IndexByName_t = std::map<String, Container_t::iterator>;
|
||||
|
||||
private:
|
||||
Container_t data;
|
||||
IndexByPosition_t index_by_position;
|
||||
IndexByName_t index_by_name;
|
||||
using Container = std::vector<ColumnWithTypeAndName>;
|
||||
using IndexByName = std::map<String, size_t>;
|
||||
|
||||
Container data;
|
||||
IndexByName index_by_name;
|
||||
|
||||
public:
|
||||
BlockInfo info;
|
||||
@ -40,26 +36,23 @@ public:
|
||||
Block() = default;
|
||||
Block(std::initializer_list<ColumnWithTypeAndName> il) : data{il}
|
||||
{
|
||||
index_by_position.reserve(il.size());
|
||||
for (auto it = std::begin(data); it != std::end(data); ++it)
|
||||
size_t i = 0;
|
||||
for (const auto & elem : il)
|
||||
{
|
||||
index_by_name[it->name] = it;
|
||||
index_by_position.push_back(it);
|
||||
index_by_name[elem.name] = i;
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
/// нужны, чтобы правильно скопировались индексы
|
||||
Block(const Block & other);
|
||||
Block(Block && other) = default;
|
||||
Block & operator= (const Block & other);
|
||||
Block & operator= (Block && other) = default;
|
||||
|
||||
/// вставить столбец в заданную позицию
|
||||
void insert(size_t position, const ColumnWithTypeAndName & elem);
|
||||
void insert(size_t position, ColumnWithTypeAndName && elem);
|
||||
/// вставить столбец в конец
|
||||
void insert(const ColumnWithTypeAndName & elem);
|
||||
void insert(ColumnWithTypeAndName && elem);
|
||||
/// вставить столбец в конец, если столбца с таким именем ещё нет
|
||||
void insertUnique(const ColumnWithTypeAndName & elem);
|
||||
void insertUnique(ColumnWithTypeAndName && elem);
|
||||
/// удалить столбец в заданной позиции
|
||||
void erase(size_t position);
|
||||
/// удалить столбец с заданным именем
|
||||
@ -67,11 +60,13 @@ public:
|
||||
/// Добавляет в блок недостающие столбцы со значениями по-умолчанию
|
||||
void addDefaults(const NamesAndTypesList & required_columns);
|
||||
|
||||
/// References are invalidated after calling functions above.
|
||||
|
||||
ColumnWithTypeAndName & getByPosition(size_t position);
|
||||
const ColumnWithTypeAndName & getByPosition(size_t position) const;
|
||||
|
||||
ColumnWithTypeAndName & unsafeGetByPosition(size_t position) { return *index_by_position[position]; }
|
||||
const ColumnWithTypeAndName & unsafeGetByPosition(size_t position) const { return *index_by_position[position]; }
|
||||
ColumnWithTypeAndName & unsafeGetByPosition(size_t position) { return data[position]; }
|
||||
const ColumnWithTypeAndName & unsafeGetByPosition(size_t position) const { return data[position]; }
|
||||
|
||||
ColumnWithTypeAndName & getByName(const std::string & name);
|
||||
const ColumnWithTypeAndName & getByName(const std::string & name) const;
|
||||
@ -92,13 +87,13 @@ public:
|
||||
*/
|
||||
size_t rowsInFirstColumn() const;
|
||||
|
||||
size_t columns() const { return index_by_position.size(); }
|
||||
size_t columns() const { return data.size(); }
|
||||
|
||||
/// Приблизительное количество байт в оперативке - для профайлинга.
|
||||
size_t bytes() const;
|
||||
|
||||
operator bool() const { return !index_by_position.empty(); }
|
||||
bool operator!() const { return index_by_position.empty(); }
|
||||
operator bool() const { return !data.empty(); }
|
||||
bool operator!() const { return data.empty(); }
|
||||
|
||||
/** Получить список имён столбцов через запятую. */
|
||||
std::string dumpNames() const;
|
||||
@ -121,6 +116,9 @@ public:
|
||||
|
||||
void clear();
|
||||
void swap(Block & other) noexcept;
|
||||
|
||||
private:
|
||||
void eraseImpl(size_t position);
|
||||
};
|
||||
|
||||
using Blocks = std::vector<Block>;
|
||||
|
@ -28,12 +28,6 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
Block::Block(const Block & other)
|
||||
{
|
||||
*this = other;
|
||||
}
|
||||
|
||||
|
||||
void Block::addDefaults(const NamesAndTypesList & required_columns)
|
||||
{
|
||||
/// Для недостающих столбцов из вложенной структуры нужно создавать не столбец пустых массивов, а столбец массивов правильных длин.
|
||||
@ -90,52 +84,38 @@ void Block::addDefaults(const NamesAndTypesList & required_columns)
|
||||
}
|
||||
}
|
||||
|
||||
Block & Block::operator= (const Block & other)
|
||||
{
|
||||
info = other.info;
|
||||
data = other.data;
|
||||
|
||||
index_by_position.resize(data.size());
|
||||
index_by_name.clear();
|
||||
|
||||
size_t pos = 0;
|
||||
for (Container_t::iterator it = data.begin(); it != data.end(); ++it, ++pos)
|
||||
{
|
||||
index_by_position[pos] = it;
|
||||
index_by_name[it->name] = it;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
void Block::insert(size_t position, const ColumnWithTypeAndName & elem)
|
||||
{
|
||||
if (position > index_by_position.size())
|
||||
if (position > data.size())
|
||||
throw Exception("Position out of bound in Block::insert(), max position = "
|
||||
+ toString(index_by_position.size()), ErrorCodes::POSITION_OUT_OF_BOUND);
|
||||
+ toString(data.size()), ErrorCodes::POSITION_OUT_OF_BOUND);
|
||||
|
||||
if (position == index_by_position.size())
|
||||
{
|
||||
insert(elem);
|
||||
return;
|
||||
}
|
||||
index_by_name[elem.name] = position;
|
||||
data.emplace(data.begin() + position, elem);
|
||||
}
|
||||
|
||||
Container_t::iterator it = data.insert(index_by_position[position], elem);
|
||||
index_by_name[elem.name] = it;
|
||||
void Block::insert(size_t position, ColumnWithTypeAndName && elem)
|
||||
{
|
||||
if (position > data.size())
|
||||
throw Exception("Position out of bound in Block::insert(), max position = "
|
||||
+ toString(data.size()), ErrorCodes::POSITION_OUT_OF_BOUND);
|
||||
|
||||
index_by_position.resize(index_by_position.size() + 1);
|
||||
for (size_t i = index_by_position.size() - 1; i > position; --i)
|
||||
index_by_position[i] = index_by_position[i - 1];
|
||||
|
||||
index_by_position[position] = it;
|
||||
index_by_name[elem.name] = position;
|
||||
data.emplace(data.begin() + position, std::move(elem));
|
||||
}
|
||||
|
||||
|
||||
void Block::insert(const ColumnWithTypeAndName & elem)
|
||||
{
|
||||
Container_t::iterator it = data.insert(data.end(), elem);
|
||||
index_by_name[elem.name] = it;
|
||||
index_by_position.push_back(it);
|
||||
index_by_name[elem.name] = data.size();
|
||||
data.emplace_back(elem);
|
||||
}
|
||||
|
||||
void Block::insert(ColumnWithTypeAndName && elem)
|
||||
{
|
||||
index_by_name[elem.name] = data.size();
|
||||
data.emplace_back(std::move(elem));
|
||||
}
|
||||
|
||||
|
||||
@ -145,97 +125,104 @@ void Block::insertUnique(const ColumnWithTypeAndName & elem)
|
||||
insert(elem);
|
||||
}
|
||||
|
||||
void Block::insertUnique(ColumnWithTypeAndName && elem)
|
||||
{
|
||||
if (index_by_name.end() == index_by_name.find(elem.name))
|
||||
insert(std::move(elem));
|
||||
}
|
||||
|
||||
|
||||
void Block::erase(size_t position)
|
||||
{
|
||||
if (index_by_position.empty())
|
||||
if (data.empty())
|
||||
throw Exception("Block is empty", ErrorCodes::POSITION_OUT_OF_BOUND);
|
||||
|
||||
if (position >= index_by_position.size())
|
||||
if (position >= data.size())
|
||||
throw Exception("Position out of bound in Block::erase(), max position = "
|
||||
+ toString(index_by_position.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND);
|
||||
+ toString(data.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND);
|
||||
|
||||
Container_t::iterator it = index_by_position[position];
|
||||
auto index_by_name_it = index_by_name.find(it->name);
|
||||
if (index_by_name.end() != index_by_name_it)
|
||||
index_by_name.erase(index_by_name_it);
|
||||
data.erase(it);
|
||||
eraseImpl(position);
|
||||
}
|
||||
|
||||
for (size_t i = position, size = index_by_position.size() - 1; i < size; ++i)
|
||||
index_by_position[i] = index_by_position[i + 1];
|
||||
|
||||
index_by_position.resize(index_by_position.size() - 1);
|
||||
void Block::eraseImpl(size_t position)
|
||||
{
|
||||
data.erase(data.begin() + position);
|
||||
|
||||
for (auto it = index_by_name.begin(); it != index_by_name.end();)
|
||||
{
|
||||
if (it->second == position)
|
||||
index_by_name.erase(it++);
|
||||
else
|
||||
{
|
||||
if (it->second > position)
|
||||
--it->second;
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Block::erase(const String & name)
|
||||
{
|
||||
IndexByName_t::iterator index_it = index_by_name.find(name);
|
||||
auto index_it = index_by_name.find(name);
|
||||
if (index_it == index_by_name.end())
|
||||
throw Exception("No such name in Block::erase(): '"
|
||||
+ name + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||
|
||||
Container_t::iterator it = index_it->second;
|
||||
index_by_name.erase(index_it);
|
||||
size_t position = std::distance(data.begin(), it);
|
||||
data.erase(it);
|
||||
|
||||
for (size_t i = position, size = index_by_position.size() - 1; i < size; ++i)
|
||||
index_by_position[i] = index_by_position[i + 1];
|
||||
|
||||
index_by_position.resize(index_by_position.size() - 1);
|
||||
eraseImpl(index_it->second);
|
||||
}
|
||||
|
||||
|
||||
ColumnWithTypeAndName & Block::getByPosition(size_t position)
|
||||
{
|
||||
if (index_by_position.empty())
|
||||
if (data.empty())
|
||||
throw Exception("Block is empty", ErrorCodes::POSITION_OUT_OF_BOUND);
|
||||
|
||||
if (position >= index_by_position.size())
|
||||
if (position >= data.size())
|
||||
throw Exception("Position " + toString(position)
|
||||
+ " is out of bound in Block::getByPosition(), max position = "
|
||||
+ toString(index_by_position.size() - 1)
|
||||
+ toString(data.size() - 1)
|
||||
+ ", there are columns: " + dumpNames(), ErrorCodes::POSITION_OUT_OF_BOUND);
|
||||
|
||||
return *index_by_position[position];
|
||||
return data[position];
|
||||
}
|
||||
|
||||
|
||||
const ColumnWithTypeAndName & Block::getByPosition(size_t position) const
|
||||
{
|
||||
if (index_by_position.empty())
|
||||
if (data.empty())
|
||||
throw Exception("Block is empty", ErrorCodes::POSITION_OUT_OF_BOUND);
|
||||
|
||||
if (position >= index_by_position.size())
|
||||
if (position >= data.size())
|
||||
throw Exception("Position " + toString(position)
|
||||
+ " is out of bound in Block::getByPosition(), max position = "
|
||||
+ toString(index_by_position.size() - 1)
|
||||
+ toString(data.size() - 1)
|
||||
+ ", there are columns: " + dumpNames(), ErrorCodes::POSITION_OUT_OF_BOUND);
|
||||
|
||||
return *index_by_position[position];
|
||||
return data[position];
|
||||
}
|
||||
|
||||
|
||||
ColumnWithTypeAndName & Block::getByName(const std::string & name)
|
||||
{
|
||||
IndexByName_t::const_iterator it = index_by_name.find(name);
|
||||
auto it = index_by_name.find(name);
|
||||
if (index_by_name.end() == it)
|
||||
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
|
||||
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||
|
||||
return *it->second;
|
||||
return data[it->second];
|
||||
}
|
||||
|
||||
|
||||
const ColumnWithTypeAndName & Block::getByName(const std::string & name) const
|
||||
{
|
||||
IndexByName_t::const_iterator it = index_by_name.find(name);
|
||||
auto it = index_by_name.find(name);
|
||||
if (index_by_name.end() == it)
|
||||
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
|
||||
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||
|
||||
return *it->second;
|
||||
return data[it->second];
|
||||
}
|
||||
|
||||
|
||||
@ -247,12 +234,12 @@ bool Block::has(const std::string & name) const
|
||||
|
||||
size_t Block::getPositionByName(const std::string & name) const
|
||||
{
|
||||
IndexByName_t::const_iterator it = index_by_name.find(name);
|
||||
auto it = index_by_name.find(name);
|
||||
if (index_by_name.end() == it)
|
||||
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
|
||||
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||
|
||||
return std::distance(const_cast<Container_t &>(data).begin(), it->second);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
|
||||
@ -346,7 +333,7 @@ Block Block::sortColumns() const
|
||||
Block sorted_block;
|
||||
|
||||
for (const auto & name : index_by_name)
|
||||
sorted_block.insert(*name.second);
|
||||
sorted_block.insert(data[name.second]);
|
||||
|
||||
return sorted_block;
|
||||
}
|
||||
@ -446,7 +433,6 @@ void Block::clear()
|
||||
info = BlockInfo();
|
||||
data.clear();
|
||||
index_by_name.clear();
|
||||
index_by_position.clear();
|
||||
}
|
||||
|
||||
void Block::swap(Block & other) noexcept
|
||||
@ -454,7 +440,6 @@ void Block::swap(Block & other) noexcept
|
||||
std::swap(info, other.info);
|
||||
data.swap(other.data);
|
||||
index_by_name.swap(other.index_by_name);
|
||||
index_by_position.swap(other.index_by_position);
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user