2021-04-23 12:53:38 +00:00
|
|
|
#include <Columns/ColumnObject.h>
|
2021-05-06 05:33:06 +00:00
|
|
|
#include <Columns/ColumnSparse.h>
|
|
|
|
#include <Columns/ColumnVector.h>
|
|
|
|
#include <DataTypes/ObjectUtils.h>
|
|
|
|
#include <DataTypes/getLeastSupertype.h>
|
|
|
|
#include <DataTypes/DataTypeNothing.h>
|
|
|
|
#include <Interpreters/castColumn.h>
|
2021-04-23 12:53:38 +00:00
|
|
|
|
2021-05-26 02:41:38 +00:00
|
|
|
#include <Common/FieldVisitors.h>
|
|
|
|
|
2021-04-23 12:53:38 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
extern const int ILLEGAL_COLUMN;
|
2021-05-06 00:40:17 +00:00
|
|
|
extern const int DUPLICATE_COLUMN;
|
2021-04-23 12:53:38 +00:00
|
|
|
}
|
|
|
|
|
2021-05-06 00:40:17 +00:00
|
|
|
ColumnObject::Subcolumn::Subcolumn(const Subcolumn & other)
|
2021-05-06 05:33:06 +00:00
|
|
|
: data(other.data), least_common_type(other.least_common_type)
|
2021-05-06 00:40:17 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
ColumnObject::Subcolumn::Subcolumn(MutableColumnPtr && data_)
|
2021-05-06 05:33:06 +00:00
|
|
|
: data(std::move(data_)), least_common_type(getDataTypeByColumn(*data))
|
2021-04-23 12:53:38 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2021-05-06 05:33:06 +00:00
|
|
|
void ColumnObject::Subcolumn::insert(const Field & field, const DataTypePtr & value_type)
|
2021-04-23 12:53:38 +00:00
|
|
|
{
|
2021-05-06 00:40:17 +00:00
|
|
|
data->insert(field);
|
2021-05-06 05:33:06 +00:00
|
|
|
least_common_type = getLeastSupertype({least_common_type, value_type}, true);
|
2021-05-06 00:40:17 +00:00
|
|
|
}
|
2021-04-23 12:53:38 +00:00
|
|
|
|
2021-05-06 00:40:17 +00:00
|
|
|
void ColumnObject::Subcolumn::insertDefault()
|
|
|
|
{
|
|
|
|
data->insertDefault();
|
|
|
|
}
|
2021-04-23 12:53:38 +00:00
|
|
|
|
2021-05-06 00:40:17 +00:00
|
|
|
ColumnObject::ColumnObject(SubcolumnsMap && subcolumns_)
|
|
|
|
: subcolumns(std::move(subcolumns_))
|
|
|
|
{
|
2021-04-23 12:53:38 +00:00
|
|
|
checkConsistency();
|
|
|
|
}
|
|
|
|
|
|
|
|
void ColumnObject::checkConsistency() const
|
|
|
|
{
|
|
|
|
if (subcolumns.empty())
|
|
|
|
return;
|
|
|
|
|
2021-05-06 00:40:17 +00:00
|
|
|
size_t first_size = subcolumns.begin()->second.size();
|
2021-04-23 12:53:38 +00:00
|
|
|
for (const auto & [name, column] : subcolumns)
|
|
|
|
{
|
2021-05-06 00:40:17 +00:00
|
|
|
if (!column.data)
|
2021-04-23 12:53:38 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Null subcolumn passed to ColumnObject");
|
|
|
|
|
2021-05-06 00:40:17 +00:00
|
|
|
if (first_size != column.data->size())
|
2021-04-23 12:53:38 +00:00
|
|
|
{
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Sizes of subcolumns are inconsistent in ColumnObject."
|
|
|
|
" Subcolumn '{}' has {} rows, subcolumn '{}' has {} rows",
|
2021-05-06 00:40:17 +00:00
|
|
|
subcolumns.begin()->first, first_size, name, column.data->size());
|
2021-04-23 12:53:38 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
MutableColumnPtr ColumnObject::cloneResized(size_t new_size) const
|
|
|
|
{
|
2021-05-06 05:33:06 +00:00
|
|
|
if (new_size != 0)
|
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
|
|
|
|
"ColumnObject doesn't support resize to non-zero length");
|
2021-04-23 12:53:38 +00:00
|
|
|
|
2021-05-06 05:33:06 +00:00
|
|
|
return ColumnObject::create();
|
2021-04-23 12:53:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
size_t ColumnObject::byteSize() const
|
|
|
|
{
|
|
|
|
size_t res = 0;
|
|
|
|
for (const auto & [_, column] : subcolumns)
|
2021-05-06 00:40:17 +00:00
|
|
|
res += column.data->byteSize();
|
2021-04-23 12:53:38 +00:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t ColumnObject::allocatedBytes() const
|
|
|
|
{
|
|
|
|
size_t res = 0;
|
|
|
|
for (const auto & [_, column] : subcolumns)
|
2021-05-06 00:40:17 +00:00
|
|
|
res += column.data->allocatedBytes();
|
2021-04-23 12:53:38 +00:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2021-05-06 00:40:17 +00:00
|
|
|
const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const String & key) const
|
2021-04-23 12:53:38 +00:00
|
|
|
{
|
|
|
|
auto it = subcolumns.find(key);
|
|
|
|
if (it != subcolumns.end())
|
2021-05-06 00:40:17 +00:00
|
|
|
return it->second;
|
2021-04-23 12:53:38 +00:00
|
|
|
|
|
|
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in ColumnObject", key);
|
|
|
|
}
|
|
|
|
|
2021-05-06 00:40:17 +00:00
|
|
|
ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const String & key)
|
2021-04-23 12:53:38 +00:00
|
|
|
{
|
|
|
|
auto it = subcolumns.find(key);
|
|
|
|
if (it != subcolumns.end())
|
2021-05-06 00:40:17 +00:00
|
|
|
return it->second;
|
2021-04-23 12:53:38 +00:00
|
|
|
|
|
|
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in ColumnObject", key);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ColumnObject::hasSubcolumn(const String & key) const
|
|
|
|
{
|
|
|
|
return subcolumns.count(key) != 0;
|
|
|
|
}
|
|
|
|
|
2021-05-06 05:33:06 +00:00
|
|
|
void ColumnObject::addSubcolumn(const String & key, const ColumnPtr & column_sample, size_t new_size, bool check_size)
|
2021-04-23 12:53:38 +00:00
|
|
|
{
|
2021-05-06 00:40:17 +00:00
|
|
|
if (subcolumns.count(key))
|
|
|
|
throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Subcolumn '{}' already exists", key);
|
|
|
|
|
|
|
|
if (!column_sample->empty())
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
|
|
|
"Cannot add subcolumn '{}' with non-empty sample column", key);
|
|
|
|
|
|
|
|
if (check_size && new_size != size())
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
|
|
|
"Cannot add subcolumn '{}' with {} rows to ColumnObject with {} rows",
|
|
|
|
key, new_size, size());
|
|
|
|
|
|
|
|
auto & subcolumn = subcolumns[key];
|
2021-05-06 05:33:06 +00:00
|
|
|
subcolumn.data = column_sample->cloneResized(new_size);
|
|
|
|
subcolumn.least_common_type = std::make_shared<DataTypeNothing>();
|
2021-05-06 00:40:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void ColumnObject::addSubcolumn(const String & key, Subcolumn && subcolumn, bool check_size)
|
|
|
|
{
|
|
|
|
if (subcolumns.count(key))
|
|
|
|
throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Subcolumn '{}' already exists", key);
|
|
|
|
|
|
|
|
if (check_size && subcolumn.size() != size())
|
2021-04-23 12:53:38 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
|
|
|
"Cannot add subcolumn '{}' with {} rows to ColumnObject with {} rows",
|
2021-05-06 00:40:17 +00:00
|
|
|
key, subcolumn.size(), size());
|
2021-04-23 12:53:38 +00:00
|
|
|
|
|
|
|
subcolumns[key] = std::move(subcolumn);
|
|
|
|
}
|
|
|
|
|
2021-04-23 23:56:26 +00:00
|
|
|
Names ColumnObject::getKeys() const
|
|
|
|
{
|
|
|
|
Names keys;
|
|
|
|
keys.reserve(subcolumns.size());
|
|
|
|
for (const auto & [key, _] : subcolumns)
|
|
|
|
keys.emplace_back(key);
|
|
|
|
return keys;
|
|
|
|
}
|
|
|
|
|
2021-05-06 05:33:06 +00:00
|
|
|
void ColumnObject::optimizeTypesOfSubcolumns()
|
|
|
|
{
|
2021-05-26 02:41:38 +00:00
|
|
|
if (optimized_types_of_subcolumns)
|
2021-05-06 05:33:06 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
for (auto & [_, subcolumn] : subcolumns)
|
|
|
|
{
|
|
|
|
auto from_type = getDataTypeByColumn(*subcolumn.data);
|
|
|
|
if (subcolumn.least_common_type->equals(*from_type))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
size_t subcolumn_size = subcolumn.size();
|
2021-05-26 02:41:38 +00:00
|
|
|
if (subcolumn.data->getNumberOfDefaultRows(/*step=*/ 1) == 0)
|
2021-05-06 05:33:06 +00:00
|
|
|
{
|
|
|
|
subcolumn.data = castColumn({subcolumn.data, from_type, ""}, subcolumn.least_common_type);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
auto offsets = ColumnUInt64::create();
|
|
|
|
auto & offsets_data = offsets->getData();
|
|
|
|
|
|
|
|
subcolumn.data->getIndicesOfNonDefaultValues(offsets_data, 0, subcolumn_size);
|
2021-05-26 02:41:38 +00:00
|
|
|
|
|
|
|
auto values = subcolumn.data->index(*offsets, offsets->size());
|
|
|
|
values = castColumn({values, from_type, ""}, subcolumn.least_common_type);
|
|
|
|
|
|
|
|
subcolumn.data = values->createWithOffsets(
|
|
|
|
offsets_data, subcolumn.least_common_type->getDefault(), subcolumn_size, /*shift=*/ 0);
|
2021-05-06 05:33:06 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-26 02:41:38 +00:00
|
|
|
optimized_types_of_subcolumns = true;
|
2021-05-06 05:33:06 +00:00
|
|
|
}
|
|
|
|
|
2021-04-23 12:53:38 +00:00
|
|
|
}
|