2022-05-31 00:10:47 +00:00
|
|
|
#include <cstddef>
|
2017-07-13 16:49:09 +00:00
|
|
|
#include <Columns/IColumn.h>
|
2017-07-21 06:35:58 +00:00
|
|
|
#include <Columns/ColumnConst.h>
|
2021-03-12 16:33:41 +00:00
|
|
|
#include <Columns/ColumnSparse.h>
|
2017-07-13 16:49:09 +00:00
|
|
|
|
2017-08-07 07:31:16 +00:00
|
|
|
#include <Common/Exception.h>
|
2020-11-10 17:32:00 +00:00
|
|
|
#include <Common/SipHash.h>
|
2017-08-07 07:31:16 +00:00
|
|
|
|
|
|
|
#include <IO/WriteHelpers.h>
|
2020-11-10 17:32:00 +00:00
|
|
|
#include <IO/Operators.h>
|
2017-08-07 07:31:16 +00:00
|
|
|
|
2017-07-13 16:49:09 +00:00
|
|
|
#include <DataTypes/IDataType.h>
|
2019-03-29 20:04:04 +00:00
|
|
|
#include <DataTypes/DataTypeCustom.h>
|
2017-12-25 18:58:39 +00:00
|
|
|
#include <DataTypes/NestedUtils.h>
|
2021-03-09 17:25:23 +00:00
|
|
|
#include <DataTypes/Serializations/SerializationSparse.h>
|
|
|
|
#include <DataTypes/Serializations/SerializationInfo.h>
|
2021-01-01 21:16:13 +00:00
|
|
|
|
2017-07-13 16:49:09 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2017-08-07 07:31:16 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2017-12-09 06:32:22 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2019-01-25 14:16:23 +00:00
|
|
|
extern const int DATA_TYPE_CANNOT_BE_PROMOTED;
|
2020-10-06 12:46:17 +00:00
|
|
|
extern const int ILLEGAL_COLUMN;
|
2017-08-07 07:31:16 +00:00
|
|
|
}
|
|
|
|
|
2020-03-08 22:38:12 +00:00
|
|
|
IDataType::~IDataType() = default;
|
2018-12-13 13:41:47 +00:00
|
|
|
|
2017-07-13 20:58:19 +00:00
|
|
|
void IDataType::updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint)
|
2017-07-13 16:49:09 +00:00
|
|
|
{
|
|
|
|
/// Update the average value size hint if amount of read rows isn't too small
|
|
|
|
size_t column_size = column.size();
|
|
|
|
if (column_size > 10)
|
|
|
|
{
|
|
|
|
double current_avg_value_size = static_cast<double>(column.byteSize()) / column_size;
|
|
|
|
|
|
|
|
/// Heuristic is chosen so that avg_value_size_hint increases rapidly but decreases slowly.
|
|
|
|
if (current_avg_value_size > avg_value_size_hint)
|
|
|
|
avg_value_size_hint = std::min(1024., current_avg_value_size); /// avoid overestimation
|
|
|
|
else if (current_avg_value_size * 2 < avg_value_size_hint)
|
|
|
|
avg_value_size_hint = (current_avg_value_size + avg_value_size_hint * 3) / 4;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-06 15:59:03 +00:00
|
|
|
MutableColumnPtr IDataType::createColumn(const ISerialization & serialization) const
|
|
|
|
{
|
|
|
|
auto column = createColumn();
|
|
|
|
if (serialization.getKind() == ISerialization::Kind::SPARSE)
|
|
|
|
return ColumnSparse::create(std::move(column));
|
|
|
|
|
|
|
|
return column;
|
|
|
|
}
|
|
|
|
|
2017-12-10 22:44:04 +00:00
|
|
|
ColumnPtr IDataType::createColumnConst(size_t size, const Field & field) const
|
2017-07-21 06:35:58 +00:00
|
|
|
{
|
2017-12-15 02:36:40 +00:00
|
|
|
auto column = createColumn();
|
2017-07-21 06:35:58 +00:00
|
|
|
column->insert(field);
|
2017-12-15 02:36:40 +00:00
|
|
|
return ColumnConst::create(std::move(column), size);
|
2017-07-21 06:35:58 +00:00
|
|
|
}
|
|
|
|
|
2017-08-07 07:31:16 +00:00
|
|
|
|
2017-12-18 04:07:26 +00:00
|
|
|
ColumnPtr IDataType::createColumnConstWithDefaultValue(size_t size) const
|
|
|
|
{
|
|
|
|
return createColumnConst(size, getDefault());
|
|
|
|
}
|
|
|
|
|
2019-01-25 14:16:23 +00:00
|
|
|
DataTypePtr IDataType::promoteNumericType() const
|
2019-01-25 13:06:21 +00:00
|
|
|
{
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::DATA_TYPE_CANNOT_BE_PROMOTED, "Data type {} can't be promoted.", getName());
|
2019-01-25 13:06:21 +00:00
|
|
|
}
|
2017-12-18 04:07:26 +00:00
|
|
|
|
2021-03-09 14:10:28 +00:00
|
|
|
size_t IDataType::getSizeOfValueInMemory() const
|
2017-08-07 07:31:16 +00:00
|
|
|
{
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value of type {} in memory is not of fixed size.", getName());
|
2017-08-07 07:31:16 +00:00
|
|
|
}
|
|
|
|
|
2021-10-11 22:01:00 +00:00
|
|
|
void IDataType::forEachSubcolumn(
|
|
|
|
const SubcolumnCallback & callback,
|
2021-10-29 17:21:02 +00:00
|
|
|
const SubstreamData & data)
|
2021-10-11 22:01:00 +00:00
|
|
|
{
|
|
|
|
ISerialization::StreamCallback callback_with_data = [&](const auto & subpath)
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < subpath.size(); ++i)
|
|
|
|
{
|
2022-06-17 01:10:52 +00:00
|
|
|
size_t prefix_len = i + 1;
|
|
|
|
if (!subpath[i].visited && ISerialization::hasSubcolumnForPath(subpath, prefix_len))
|
2021-10-11 22:01:00 +00:00
|
|
|
{
|
2022-06-17 01:10:52 +00:00
|
|
|
auto name = ISerialization::getSubcolumnNameForStream(subpath, prefix_len);
|
|
|
|
auto subdata = ISerialization::createFromPath(subpath, prefix_len);
|
2021-10-29 17:21:02 +00:00
|
|
|
callback(subpath, name, subdata);
|
2021-10-11 22:01:00 +00:00
|
|
|
}
|
|
|
|
subpath[i].visited = true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2022-06-17 01:10:52 +00:00
|
|
|
ISerialization::EnumerateStreamsSettings settings;
|
|
|
|
settings.position_independent_encoding = false;
|
|
|
|
data.serialization->enumerateStreams(settings, callback_with_data, data);
|
2021-10-11 22:01:00 +00:00
|
|
|
}
|
|
|
|
|
2021-10-29 17:21:02 +00:00
|
|
|
template <typename Ptr>
|
|
|
|
Ptr IDataType::getForSubcolumn(
|
|
|
|
const String & subcolumn_name,
|
|
|
|
const SubstreamData & data,
|
|
|
|
Ptr SubstreamData::*member,
|
|
|
|
bool throw_if_null) const
|
2017-08-07 07:31:16 +00:00
|
|
|
{
|
2021-10-29 17:21:02 +00:00
|
|
|
Ptr res;
|
|
|
|
forEachSubcolumn([&](const auto &, const auto & name, const auto & subdata)
|
2021-10-11 22:01:00 +00:00
|
|
|
{
|
|
|
|
if (name == subcolumn_name)
|
2021-10-29 17:21:02 +00:00
|
|
|
res = subdata.*member;
|
|
|
|
}, data);
|
|
|
|
|
|
|
|
if (!res && throw_if_null)
|
|
|
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
|
2017-08-07 07:31:16 +00:00
|
|
|
|
2021-10-11 22:01:00 +00:00
|
|
|
return res;
|
2017-12-09 06:32:22 +00:00
|
|
|
}
|
|
|
|
|
2022-06-17 01:10:52 +00:00
|
|
|
bool IDataType::hasSubcolumn(const String & subcolumn_name) const
|
|
|
|
{
|
|
|
|
return tryGetSubcolumnType(subcolumn_name) != nullptr;
|
|
|
|
}
|
|
|
|
|
2021-10-29 17:21:02 +00:00
|
|
|
DataTypePtr IDataType::tryGetSubcolumnType(const String & subcolumn_name) const
|
2020-10-06 12:46:17 +00:00
|
|
|
{
|
2022-06-17 01:10:52 +00:00
|
|
|
auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
|
2021-10-29 17:21:02 +00:00
|
|
|
return getForSubcolumn<DataTypePtr>(subcolumn_name, data, &SubstreamData::type, false);
|
|
|
|
}
|
2020-10-14 17:47:14 +00:00
|
|
|
|
2021-10-29 17:21:02 +00:00
|
|
|
DataTypePtr IDataType::getSubcolumnType(const String & subcolumn_name) const
|
|
|
|
{
|
2022-06-17 01:10:52 +00:00
|
|
|
auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
|
2022-02-09 00:18:53 +00:00
|
|
|
return getForSubcolumn<DataTypePtr>(subcolumn_name, data, &SubstreamData::type, true);
|
2020-10-06 12:46:17 +00:00
|
|
|
}
|
|
|
|
|
2022-02-09 00:18:53 +00:00
|
|
|
ColumnPtr IDataType::tryGetSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const
|
2020-10-06 12:46:17 +00:00
|
|
|
{
|
2022-06-17 01:10:52 +00:00
|
|
|
auto data = SubstreamData(getDefaultSerialization()).withColumn(column);
|
2022-02-09 00:18:53 +00:00
|
|
|
return getForSubcolumn<ColumnPtr>(subcolumn_name, data, &SubstreamData::column, false);
|
2020-10-06 12:46:17 +00:00
|
|
|
}
|
|
|
|
|
2021-10-11 22:01:00 +00:00
|
|
|
ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const
|
2020-10-06 12:46:17 +00:00
|
|
|
{
|
2022-06-17 01:10:52 +00:00
|
|
|
auto data = SubstreamData(getDefaultSerialization()).withColumn(column);
|
2022-02-09 00:18:53 +00:00
|
|
|
return getForSubcolumn<ColumnPtr>(subcolumn_name, data, &SubstreamData::column, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
SerializationPtr IDataType::getSubcolumnSerialization(const String & subcolumn_name, const SerializationPtr & serialization) const
|
|
|
|
{
|
2022-06-17 01:10:52 +00:00
|
|
|
auto data = SubstreamData(serialization);
|
2022-02-09 00:18:53 +00:00
|
|
|
return getForSubcolumn<SerializationPtr>(subcolumn_name, data, &SubstreamData::serialization, true);
|
2021-09-11 20:24:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Names IDataType::getSubcolumnNames() const
|
|
|
|
{
|
|
|
|
Names res;
|
2021-10-11 22:01:00 +00:00
|
|
|
forEachSubcolumn([&](const auto &, const auto & name, const auto &)
|
2021-09-11 20:24:01 +00:00
|
|
|
{
|
|
|
|
res.push_back(name);
|
2022-06-17 01:10:52 +00:00
|
|
|
}, SubstreamData(getDefaultSerialization()));
|
2021-09-11 20:24:01 +00:00
|
|
|
return res;
|
2020-10-06 12:46:17 +00:00
|
|
|
}
|
|
|
|
|
2017-09-04 01:11:00 +00:00
|
|
|
void IDataType::insertDefaultInto(IColumn & column) const
|
|
|
|
{
|
|
|
|
column.insertDefault();
|
2017-07-13 16:49:09 +00:00
|
|
|
}
|
2017-09-04 14:04:35 +00:00
|
|
|
|
2022-05-31 00:10:47 +00:00
|
|
|
void IDataType::insertManyDefaultsInto(IColumn & column, size_t n) const
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < n; ++i)
|
|
|
|
insertDefaultInto(column);
|
|
|
|
}
|
|
|
|
|
2021-03-09 14:10:28 +00:00
|
|
|
void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const
|
2020-11-10 12:13:33 +00:00
|
|
|
{
|
2021-03-09 14:10:28 +00:00
|
|
|
/// replace only if not null
|
|
|
|
if (custom_desc_->name)
|
|
|
|
custom_name = std::move(custom_desc_->name);
|
2020-11-10 12:13:33 +00:00
|
|
|
|
2021-03-09 14:10:28 +00:00
|
|
|
if (custom_desc_->serialization)
|
|
|
|
custom_serialization = std::move(custom_desc_->serialization);
|
2020-11-10 12:13:33 +00:00
|
|
|
}
|
|
|
|
|
2022-07-21 14:47:19 +00:00
|
|
|
MutableSerializationInfoPtr IDataType::createSerializationInfo(const SerializationInfo::Settings & settings) const
|
2021-10-29 17:21:02 +00:00
|
|
|
{
|
|
|
|
return std::make_shared<SerializationInfo>(ISerialization::Kind::DEFAULT, settings);
|
|
|
|
}
|
|
|
|
|
2022-07-21 14:47:19 +00:00
|
|
|
SerializationInfoPtr IDataType::getSerializationInfo(const IColumn & column) const
|
|
|
|
{
|
|
|
|
if (const auto * column_const = checkAndGetColumn<ColumnConst>(&column))
|
|
|
|
return getSerializationInfo(column_const->getDataColumn());
|
|
|
|
|
|
|
|
return std::make_shared<SerializationInfo>(ISerialization::getKind(column), SerializationInfo::Settings{});
|
|
|
|
}
|
|
|
|
|
2021-03-09 14:10:28 +00:00
|
|
|
SerializationPtr IDataType::getDefaultSerialization() const
|
2020-11-10 12:13:33 +00:00
|
|
|
{
|
2021-03-09 14:10:28 +00:00
|
|
|
if (custom_serialization)
|
|
|
|
return custom_serialization;
|
2020-11-10 12:13:33 +00:00
|
|
|
|
2021-03-09 14:10:28 +00:00
|
|
|
return doGetDefaultSerialization();
|
2020-11-10 12:13:33 +00:00
|
|
|
}
|
|
|
|
|
2021-03-09 17:25:23 +00:00
|
|
|
SerializationPtr IDataType::getSparseSerialization() const
|
|
|
|
{
|
|
|
|
return std::make_shared<SerializationSparse>(getDefaultSerialization());
|
|
|
|
}
|
|
|
|
|
2021-10-04 15:21:38 +00:00
|
|
|
SerializationPtr IDataType::getSerialization(ISerialization::Kind kind) const
|
2021-03-09 17:25:23 +00:00
|
|
|
{
|
2021-10-04 15:21:38 +00:00
|
|
|
if (supportsSparseSerialization() && kind == ISerialization::Kind::SPARSE)
|
2021-03-12 16:33:41 +00:00
|
|
|
return getSparseSerialization();
|
|
|
|
|
2021-05-14 23:20:00 +00:00
|
|
|
return getDefaultSerialization();
|
2021-03-09 17:25:23 +00:00
|
|
|
}
|
|
|
|
|
2021-10-29 17:21:02 +00:00
|
|
|
SerializationPtr IDataType::getSerialization(const SerializationInfo & info) const
|
2021-10-04 15:21:38 +00:00
|
|
|
{
|
2021-10-29 17:21:02 +00:00
|
|
|
return getSerialization(info.getKind());
|
2021-03-09 17:25:23 +00:00
|
|
|
}
|
|
|
|
|
2021-03-09 14:10:28 +00:00
|
|
|
// static
|
2021-05-19 01:48:46 +00:00
|
|
|
SerializationPtr IDataType::getSerialization(const NameAndTypePair & column, const SerializationInfo & info)
|
2018-12-13 13:41:47 +00:00
|
|
|
{
|
2021-03-09 14:10:28 +00:00
|
|
|
if (column.isSubcolumn())
|
|
|
|
{
|
2021-09-18 19:31:30 +00:00
|
|
|
const auto & type_in_storage = column.getTypeInStorage();
|
2021-10-29 17:21:02 +00:00
|
|
|
auto serialization = type_in_storage->getSerialization(info);
|
|
|
|
return type_in_storage->getSubcolumnSerialization(column.getSubcolumnName(), serialization);
|
2021-03-09 14:10:28 +00:00
|
|
|
}
|
2018-12-13 13:41:47 +00:00
|
|
|
|
2021-10-29 17:21:02 +00:00
|
|
|
return column.type->getSerialization(info);
|
2018-12-13 13:41:47 +00:00
|
|
|
}
|
|
|
|
|
2021-11-01 02:13:07 +00:00
|
|
|
// static
|
|
|
|
SerializationPtr IDataType::getSerialization(const NameAndTypePair & column)
|
|
|
|
{
|
|
|
|
if (column.isSubcolumn())
|
|
|
|
{
|
|
|
|
const auto & type_in_storage = column.getTypeInStorage();
|
|
|
|
auto serialization = type_in_storage->getDefaultSerialization();
|
|
|
|
return type_in_storage->getSubcolumnSerialization(column.getSubcolumnName(), serialization);
|
|
|
|
}
|
|
|
|
|
|
|
|
return column.type->getDefaultSerialization();
|
|
|
|
}
|
|
|
|
|
2017-09-04 14:04:35 +00:00
|
|
|
}
|