mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-19 14:11:58 +00:00
fix json type with sparse columns
This commit is contained in:
parent
39d61e9a37
commit
e0d2c8fb37
@ -263,11 +263,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
SerializationInfoPtr getSerializationInfo() const override
|
||||
{
|
||||
return data->getSerializationInfo();
|
||||
}
|
||||
|
||||
bool isNullable() const override { return isColumnNullable(*data); }
|
||||
bool onlyNull() const override { return data->isNullAt(0); }
|
||||
bool isNumeric() const override { return data->isNumeric(); }
|
||||
|
@ -561,15 +561,4 @@ void ColumnTuple::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, siz
|
||||
return getIndicesOfNonDefaultRowsImpl<ColumnTuple>(indices, from, limit);
|
||||
}
|
||||
|
||||
SerializationInfoPtr ColumnTuple::getSerializationInfo() const
|
||||
{
|
||||
MutableSerializationInfos infos;
|
||||
infos.reserve(columns.size());
|
||||
|
||||
for (const auto & column : columns)
|
||||
infos.push_back(const_pointer_cast<SerializationInfo>(column->getSerializationInfo()));
|
||||
|
||||
return std::make_shared<SerializationInfoTuple>(std::move(infos), SerializationInfo::Settings{});
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -102,7 +102,6 @@ public:
|
||||
ColumnPtr compress() const override;
|
||||
double getRatioOfDefaultRows(double sample_ratio) const override;
|
||||
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
|
||||
SerializationInfoPtr getSerializationInfo() const override;
|
||||
|
||||
size_t tupleSize() const { return columns.size(); }
|
||||
|
||||
|
@ -64,11 +64,6 @@ ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const Field & defa
|
||||
return res;
|
||||
}
|
||||
|
||||
SerializationInfoPtr IColumn::getSerializationInfo() const
|
||||
{
|
||||
return std::make_shared<SerializationInfo>(ISerialization::getKind(*this), SerializationInfo::Settings{});
|
||||
}
|
||||
|
||||
bool isColumnNullable(const IColumn & column)
|
||||
{
|
||||
return checkColumn<ColumnNullable>(column);
|
||||
|
@ -35,9 +35,6 @@ class ColumnGathererStream;
|
||||
class Field;
|
||||
class WeakHash32;
|
||||
|
||||
class SerializationInfo;
|
||||
using SerializationInfoPtr = std::shared_ptr<const SerializationInfo>;
|
||||
|
||||
/*
|
||||
* Represents a set of equal ranges in previous column to perform sorting in current column.
|
||||
* Used in sorting by tuples.
|
||||
@ -445,8 +442,6 @@ public:
|
||||
/// Used to create full column from sparse.
|
||||
[[nodiscard]] virtual Ptr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const;
|
||||
|
||||
[[nodiscard]] virtual SerializationInfoPtr getSerializationInfo() const;
|
||||
|
||||
/// Compress column in memory to some representation that allows to decompress it back.
|
||||
/// Return itself if compression is not applicable for this column type.
|
||||
[[nodiscard]] virtual Ptr compress() const
|
||||
|
@ -119,7 +119,6 @@ void DataTypeFactory::registerDataType(const String & family_name, Value creator
|
||||
throw Exception("DataTypeFactory: the data type family name '" + family_name + "' is not unique",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
|
||||
if (case_sensitiveness == CaseInsensitive
|
||||
&& !case_insensitive_data_types.emplace(family_name_lowercase, creator).second)
|
||||
throw Exception("DataTypeFactory: the case insensitive data type family name '" + family_name + "' is not unique",
|
||||
|
@ -25,7 +25,7 @@ class DataTypeFactory final : private boost::noncopyable, public IFactoryWithAli
|
||||
private:
|
||||
using SimpleCreator = std::function<DataTypePtr()>;
|
||||
using DataTypesDictionary = std::unordered_map<String, Value>;
|
||||
using CreatorWithCustom = std::function<std::pair<DataTypePtr,DataTypeCustomDescPtr>(const ASTPtr & parameters)>;
|
||||
using CreatorWithCustom = std::function<std::pair<DataTypePtr, DataTypeCustomDescPtr>(const ASTPtr & parameters)>;
|
||||
using SimpleCreatorWithCustom = std::function<std::pair<DataTypePtr,DataTypeCustomDescPtr>()>;
|
||||
|
||||
public:
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <base/range.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Core/Field.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
@ -257,6 +258,7 @@ size_t DataTypeTuple::getSizeOfValueInMemory() const
|
||||
SerializationPtr DataTypeTuple::doGetDefaultSerialization() const
|
||||
{
|
||||
SerializationTuple::ElementSerializations serializations(elems.size());
|
||||
|
||||
for (size_t i = 0; i < elems.size(); ++i)
|
||||
{
|
||||
String elem_name = have_explicit_names ? names[i] : toString(i + 1);
|
||||
@ -289,7 +291,27 @@ MutableSerializationInfoPtr DataTypeTuple::createSerializationInfo(const Seriali
|
||||
for (const auto & elem : elems)
|
||||
infos.push_back(elem->createSerializationInfo(settings));
|
||||
|
||||
return std::make_shared<SerializationInfoTuple>(std::move(infos), settings);
|
||||
return std::make_shared<SerializationInfoTuple>(std::move(infos), names, settings);
|
||||
}
|
||||
|
||||
SerializationInfoPtr DataTypeTuple::getSerializationInfo(const IColumn & column) const
|
||||
{
|
||||
if (const auto * column_const = checkAndGetColumn<ColumnConst>(&column))
|
||||
return getSerializationInfo(column_const->getDataColumn());
|
||||
|
||||
MutableSerializationInfos infos;
|
||||
infos.reserve(elems.size());
|
||||
|
||||
const auto & column_tuple = assert_cast<const ColumnTuple &>(column);
|
||||
assert(elems.size() == column_tuple.getColumns().size());
|
||||
|
||||
for (size_t i = 0; i < elems.size(); ++i)
|
||||
{
|
||||
auto element_info = elems[i]->getSerializationInfo(column_tuple.getColumn(i));
|
||||
infos.push_back(const_pointer_cast<SerializationInfo>(element_info));
|
||||
}
|
||||
|
||||
return std::make_shared<SerializationInfoTuple>(std::move(infos), names, SerializationInfo::Settings{});
|
||||
}
|
||||
|
||||
|
||||
|
@ -22,6 +22,7 @@ private:
|
||||
DataTypes elems;
|
||||
Strings names;
|
||||
bool have_explicit_names;
|
||||
|
||||
public:
|
||||
static constexpr bool is_parametric = true;
|
||||
|
||||
@ -54,6 +55,7 @@ public:
|
||||
SerializationPtr doGetDefaultSerialization() const override;
|
||||
SerializationPtr getSerialization(const SerializationInfo & info) const override;
|
||||
MutableSerializationInfoPtr createSerializationInfo(const SerializationInfo::Settings & settings) const override;
|
||||
SerializationInfoPtr getSerializationInfo(const IColumn & column) const override;
|
||||
|
||||
const DataTypePtr & getElement(size_t i) const { return elems[i]; }
|
||||
const DataTypes & getElements() const { return elems; }
|
||||
|
@ -179,12 +179,19 @@ void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const
|
||||
custom_serialization = std::move(custom_desc_->serialization);
|
||||
}
|
||||
|
||||
MutableSerializationInfoPtr IDataType::createSerializationInfo(
|
||||
const SerializationInfo::Settings & settings) const
|
||||
MutableSerializationInfoPtr IDataType::createSerializationInfo(const SerializationInfo::Settings & settings) const
|
||||
{
|
||||
return std::make_shared<SerializationInfo>(ISerialization::Kind::DEFAULT, settings);
|
||||
}
|
||||
|
||||
SerializationInfoPtr IDataType::getSerializationInfo(const IColumn & column) const
|
||||
{
|
||||
if (const auto * column_const = checkAndGetColumn<ColumnConst>(&column))
|
||||
return getSerializationInfo(column_const->getDataColumn());
|
||||
|
||||
return std::make_shared<SerializationInfo>(ISerialization::getKind(column), SerializationInfo::Settings{});
|
||||
}
|
||||
|
||||
SerializationPtr IDataType::getDefaultSerialization() const
|
||||
{
|
||||
if (custom_serialization)
|
||||
|
@ -101,8 +101,8 @@ public:
|
||||
|
||||
Names getSubcolumnNames() const;
|
||||
|
||||
virtual MutableSerializationInfoPtr createSerializationInfo(
|
||||
const SerializationInfo::Settings & settings) const;
|
||||
virtual MutableSerializationInfoPtr createSerializationInfo(const SerializationInfo::Settings & settings) const;
|
||||
virtual SerializationInfoPtr getSerializationInfo(const IColumn & column) const;
|
||||
|
||||
/// TODO: support more types.
|
||||
virtual bool supportsSparseSerialization() const { return !haveSubtypes(); }
|
||||
|
@ -1,9 +1,9 @@
|
||||
#include <DataTypes/Serializations/SerializationInfo.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <Columns/ColumnSparse.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/VarInt.h>
|
||||
#include <Core/Block.h>
|
||||
#include <base/EnumReflection.h>
|
||||
|
||||
#include <Poco/JSON/JSON.h>
|
||||
@ -47,12 +47,25 @@ void SerializationInfo::Data::add(const Data & other)
|
||||
num_defaults += other.num_defaults;
|
||||
}
|
||||
|
||||
void SerializationInfo::Data::addDefaults(size_t length)
|
||||
{
|
||||
num_rows += length;
|
||||
num_defaults += length;
|
||||
}
|
||||
|
||||
SerializationInfo::SerializationInfo(ISerialization::Kind kind_, const Settings & settings_)
|
||||
: settings(settings_)
|
||||
, kind(kind_)
|
||||
{
|
||||
}
|
||||
|
||||
SerializationInfo::SerializationInfo(ISerialization::Kind kind_, const Settings & settings_, const Data & data_)
|
||||
: settings(settings_)
|
||||
, kind(kind_)
|
||||
, data(data_)
|
||||
{
|
||||
}
|
||||
|
||||
void SerializationInfo::add(const IColumn & column)
|
||||
{
|
||||
data.add(column);
|
||||
@ -67,6 +80,13 @@ void SerializationInfo::add(const SerializationInfo & other)
|
||||
kind = chooseKind(data, settings);
|
||||
}
|
||||
|
||||
void SerializationInfo::addDefaults(size_t length)
|
||||
{
|
||||
data.addDefaults(length);
|
||||
if (settings.choose_kind)
|
||||
kind = chooseKind(data, settings);
|
||||
}
|
||||
|
||||
void SerializationInfo::replaceData(const SerializationInfo & other)
|
||||
{
|
||||
data = other.data;
|
||||
@ -74,9 +94,7 @@ void SerializationInfo::replaceData(const SerializationInfo & other)
|
||||
|
||||
MutableSerializationInfoPtr SerializationInfo::clone() const
|
||||
{
|
||||
auto res = std::make_shared<SerializationInfo>(kind, settings);
|
||||
res->data = data;
|
||||
return res;
|
||||
return std::make_shared<SerializationInfo>(kind, settings, data);
|
||||
}
|
||||
|
||||
void SerializationInfo::serialializeKindBinary(WriteBuffer & out) const
|
||||
|
@ -34,6 +34,7 @@ public:
|
||||
|
||||
void add(const IColumn & column);
|
||||
void add(const Data & other);
|
||||
void addDefaults(size_t length);
|
||||
};
|
||||
|
||||
struct Settings
|
||||
@ -45,6 +46,7 @@ public:
|
||||
};
|
||||
|
||||
SerializationInfo(ISerialization::Kind kind_, const Settings & settings_);
|
||||
SerializationInfo(ISerialization::Kind kind_, const Settings & settings_, const Data & data_);
|
||||
|
||||
virtual ~SerializationInfo() = default;
|
||||
|
||||
@ -52,7 +54,9 @@ public:
|
||||
|
||||
virtual void add(const IColumn & column);
|
||||
virtual void add(const SerializationInfo & other);
|
||||
virtual void addDefaults(size_t length);
|
||||
virtual void replaceData(const SerializationInfo & other);
|
||||
|
||||
virtual std::shared_ptr<SerializationInfo> clone() const;
|
||||
|
||||
virtual void serialializeKindBinary(WriteBuffer & out) const;
|
||||
@ -61,6 +65,7 @@ public:
|
||||
virtual Poco::JSON::Object toJSON() const;
|
||||
virtual void fromJSON(const Poco::JSON::Object & object);
|
||||
|
||||
void setKind(ISerialization::Kind kind_) { kind = kind_; }
|
||||
const Settings & getSettings() const { return settings; }
|
||||
const Data & getData() const { return data; }
|
||||
ISerialization::Kind getKind() const { return kind; }
|
||||
|
@ -10,13 +10,18 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int CORRUPTED_DATA;
|
||||
extern const int THERE_IS_NO_COLUMN;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
SerializationInfoTuple::SerializationInfoTuple(
|
||||
MutableSerializationInfos elems_, const Settings & settings_)
|
||||
MutableSerializationInfos elems_, Names names_, const Settings & settings_)
|
||||
: SerializationInfo(ISerialization::Kind::DEFAULT, settings_)
|
||||
, elems(std::move(elems_))
|
||||
, names(std::move(names_))
|
||||
{
|
||||
assert(names.size() == elems.size());
|
||||
for (size_t i = 0; i < names.size(); ++i)
|
||||
name_to_elem[names[i]] = elems[i];
|
||||
}
|
||||
|
||||
bool SerializationInfoTuple::hasCustomSerialization() const
|
||||
@ -40,22 +45,34 @@ void SerializationInfoTuple::add(const SerializationInfo & other)
|
||||
{
|
||||
SerializationInfo::add(other);
|
||||
|
||||
const auto & info_tuple = assert_cast<const SerializationInfoTuple &>(other);
|
||||
assert(elems.size() == info_tuple.elems.size());
|
||||
const auto & other_info = assert_cast<const SerializationInfoTuple &>(other);
|
||||
for (const auto & [name, elem] : name_to_elem)
|
||||
{
|
||||
auto it = other_info.name_to_elem.find(name);
|
||||
if (it != other_info.name_to_elem.end())
|
||||
elem->add(*it->second);
|
||||
else
|
||||
elem->addDefaults(other_info.getData().num_rows);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < elems.size(); ++i)
|
||||
elems[i]->add(*info_tuple.elems[i]);
|
||||
void SerializationInfoTuple::addDefaults(size_t length)
|
||||
{
|
||||
for (const auto & elem : elems)
|
||||
elem->addDefaults(length);
|
||||
}
|
||||
|
||||
void SerializationInfoTuple::replaceData(const SerializationInfo & other)
|
||||
{
|
||||
SerializationInfo::add(other);
|
||||
|
||||
const auto & info_tuple = assert_cast<const SerializationInfoTuple &>(other);
|
||||
assert(elems.size() == info_tuple.elems.size());
|
||||
|
||||
for (size_t i = 0; i < elems.size(); ++i)
|
||||
elems[i]->replaceData(*info_tuple.elems[i]);
|
||||
const auto & other_info = assert_cast<const SerializationInfoTuple &>(other);
|
||||
for (const auto & [name, elem] : name_to_elem)
|
||||
{
|
||||
auto it = other_info.name_to_elem.find(name);
|
||||
if (it != other_info.name_to_elem.end())
|
||||
elem->replaceData(*it->second);
|
||||
}
|
||||
}
|
||||
|
||||
MutableSerializationInfoPtr SerializationInfoTuple::clone() const
|
||||
@ -65,7 +82,7 @@ MutableSerializationInfoPtr SerializationInfoTuple::clone() const
|
||||
for (const auto & elem : elems)
|
||||
elems_cloned.push_back(elem->clone());
|
||||
|
||||
return std::make_shared<SerializationInfoTuple>(std::move(elems_cloned), settings);
|
||||
return std::make_shared<SerializationInfoTuple>(std::move(elems_cloned), names, settings);
|
||||
}
|
||||
|
||||
void SerializationInfoTuple::serialializeKindBinary(WriteBuffer & out) const
|
||||
@ -99,7 +116,7 @@ void SerializationInfoTuple::fromJSON(const Poco::JSON::Object & object)
|
||||
|
||||
if (!object.has("subcolumns"))
|
||||
throw Exception(ErrorCodes::CORRUPTED_DATA,
|
||||
"Missed field '{}' in SerializationInfo of columns SerializationInfoTuple");
|
||||
"Missed field 'subcolumns' in SerializationInfo of columns SerializationInfoTuple");
|
||||
|
||||
auto subcolumns = object.getArray("subcolumns");
|
||||
if (elems.size() != subcolumns->size())
|
||||
|
@ -1,4 +1,5 @@
|
||||
#pragma once
|
||||
#include <Core/Names.h>
|
||||
#include <DataTypes/Serializations/SerializationInfo.h>
|
||||
|
||||
namespace DB
|
||||
@ -7,25 +8,32 @@ namespace DB
|
||||
class SerializationInfoTuple : public SerializationInfo
|
||||
{
|
||||
public:
|
||||
SerializationInfoTuple(MutableSerializationInfos elems_, const Settings & settings_);
|
||||
SerializationInfoTuple(MutableSerializationInfos elems_, Names names_, const Settings & settings_);
|
||||
|
||||
bool hasCustomSerialization() const override;
|
||||
|
||||
void add(const IColumn & column) override;
|
||||
void add(const SerializationInfo & other) override;
|
||||
void addDefaults(size_t length) override;
|
||||
void replaceData(const SerializationInfo & other) override;
|
||||
|
||||
MutableSerializationInfoPtr clone() const override;
|
||||
|
||||
void serialializeKindBinary(WriteBuffer & out) const override;
|
||||
void deserializeFromKindsBinary(ReadBuffer & in) override;
|
||||
|
||||
Poco::JSON::Object toJSON() const override;
|
||||
void fromJSON(const Poco::JSON::Object & object) override;
|
||||
|
||||
MutableSerializationInfoPtr getElementInfo(size_t i) const { return elems[i]; }
|
||||
const MutableSerializationInfoPtr & getElementInfo(size_t i) const { return elems[i]; }
|
||||
ISerialization::Kind getElementKind(size_t i) const { return elems[i]->getKind(); }
|
||||
|
||||
private:
|
||||
MutableSerializationInfos elems;
|
||||
Names names;
|
||||
|
||||
using NameToElem = std::unordered_map<String, MutableSerializationInfoPtr>;
|
||||
NameToElem name_to_elem;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -103,7 +103,7 @@ void NativeWriter::write(const Block & block)
|
||||
mark.offset_in_decompressed_block = ostr_concrete->getRemainingBytes();
|
||||
}
|
||||
|
||||
ColumnWithTypeAndName column = block.safeGetByPosition(i);
|
||||
auto column = block.safeGetByPosition(i);
|
||||
|
||||
/// Send data to old clients without low cardinality type.
|
||||
if (remove_low_cardinality || (client_revision && client_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE))
|
||||
@ -145,7 +145,7 @@ void NativeWriter::write(const Block & block)
|
||||
SerializationPtr serialization;
|
||||
if (client_revision >= DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION)
|
||||
{
|
||||
auto info = column.column->getSerializationInfo();
|
||||
auto info = column.type->getSerializationInfo(*column.column);
|
||||
serialization = column.type->getSerialization(*info);
|
||||
|
||||
bool has_custom = info->hasCustomSerialization();
|
||||
|
@ -15,6 +15,8 @@
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/MergeTree/PartMetadataManagerOrdinary.h>
|
||||
#include <Storages/MergeTree/PartMetadataManagerWithCache.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
@ -445,6 +447,18 @@ void IMergeTreeDataPart::setColumns(const NamesAndTypesList & new_columns)
|
||||
|
||||
for (const auto & column : columns)
|
||||
column_name_to_position.emplace(column.name, pos++);
|
||||
|
||||
columns_description = ColumnsDescription(columns);
|
||||
}
|
||||
|
||||
NameAndTypePair IMergeTreeDataPart::getColumn(const String & column_name) const
|
||||
{
|
||||
return columns_description.getColumnOrSubcolumn(GetColumnsOptions::AllPhysical, column_name);
|
||||
}
|
||||
|
||||
std::optional<NameAndTypePair> IMergeTreeDataPart::tryGetColumn(const String & column_name) const
|
||||
{
|
||||
return columns_description.tryGetColumnOrSubcolumn(GetColumnsOptions::AllPhysical, column_name);
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::setSerializationInfos(const SerializationInfoByName & new_infos)
|
||||
@ -454,10 +468,15 @@ void IMergeTreeDataPart::setSerializationInfos(const SerializationInfoByName & n
|
||||
|
||||
SerializationPtr IMergeTreeDataPart::getSerialization(const NameAndTypePair & column) const
|
||||
{
|
||||
auto it = serialization_infos.find(column.getNameInStorage());
|
||||
return it == serialization_infos.end()
|
||||
? IDataType::getSerialization(column)
|
||||
: IDataType::getSerialization(column, *it->second);
|
||||
auto column_in_part = tryGetColumn(column.name);
|
||||
if (!column_in_part)
|
||||
return IDataType::getSerialization(column);
|
||||
|
||||
auto it = serialization_infos.find(column_in_part->getNameInStorage());
|
||||
if (it == serialization_infos.end())
|
||||
return IDataType::getSerialization(*column_in_part);
|
||||
|
||||
return IDataType::getSerialization(*column_in_part, *it->second);
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::removeIfNeeded()
|
||||
@ -564,37 +583,38 @@ size_t IMergeTreeDataPart::getFileSizeOrZero(const String & file_name) const
|
||||
return checksum->second.file_size;
|
||||
}
|
||||
|
||||
String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(
|
||||
const StorageSnapshotPtr & storage_snapshot, bool with_subcolumns) const
|
||||
String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(bool with_subcolumns) const
|
||||
{
|
||||
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects();
|
||||
if (with_subcolumns)
|
||||
options.withSubcolumns();
|
||||
auto find_column_with_minimum_size = [&](const auto & columns_list)
|
||||
{
|
||||
std::optional<std::string> minimum_size_column;
|
||||
UInt64 minimum_size = std::numeric_limits<UInt64>::max();
|
||||
|
||||
auto storage_columns = storage_snapshot->getColumns(options);
|
||||
MergeTreeData::AlterConversions alter_conversions;
|
||||
if (!parent_part)
|
||||
alter_conversions = storage.getAlterConversionsForPart(shared_from_this());
|
||||
for (const auto & column : columns_list)
|
||||
{
|
||||
if (!hasColumnFiles(column))
|
||||
continue;
|
||||
|
||||
const auto size = getColumnSize(column.name).data_compressed;
|
||||
if (size < minimum_size)
|
||||
{
|
||||
minimum_size = size;
|
||||
minimum_size_column = column.name;
|
||||
}
|
||||
}
|
||||
|
||||
return minimum_size_column;
|
||||
};
|
||||
|
||||
std::optional<std::string> minimum_size_column;
|
||||
UInt64 minimum_size = std::numeric_limits<UInt64>::max();
|
||||
|
||||
for (const auto & column : storage_columns)
|
||||
if (with_subcolumns)
|
||||
{
|
||||
auto column_name = column.name;
|
||||
auto column_type = column.type;
|
||||
if (alter_conversions.isColumnRenamed(column.name))
|
||||
column_name = alter_conversions.getColumnOldName(column.name);
|
||||
|
||||
if (!hasColumnFiles(column))
|
||||
continue;
|
||||
|
||||
const auto size = getColumnSize(column_name).data_compressed;
|
||||
if (size < minimum_size)
|
||||
{
|
||||
minimum_size = size;
|
||||
minimum_size_column = column_name;
|
||||
}
|
||||
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns();
|
||||
minimum_size_column = find_column_with_minimum_size(columns_description.get(options));
|
||||
}
|
||||
else
|
||||
{
|
||||
minimum_size_column = find_column_with_minimum_size(columns);
|
||||
}
|
||||
|
||||
if (!minimum_size_column)
|
||||
@ -603,22 +623,6 @@ String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(
|
||||
return *minimum_size_column;
|
||||
}
|
||||
|
||||
// String IMergeTreeDataPart::getFullPath() const
|
||||
// {
|
||||
// if (relative_path.empty())
|
||||
// throw Exception("Part relative_path cannot be empty. It's bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
// return fs::path(storage.getFullPathOnDisk(volume->getDisk())) / (parent_part ? parent_part->relative_path : "") / relative_path / "";
|
||||
// }
|
||||
|
||||
// String IMergeTreeDataPart::getRelativePath() const
|
||||
// {
|
||||
// if (relative_path.empty())
|
||||
// throw Exception("Part relative_path cannot be empty. It's bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
// return fs::path(storage.relative_data_path) / (parent_part ? parent_part->relative_path : "") / relative_path / "";
|
||||
// }
|
||||
|
||||
void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency)
|
||||
{
|
||||
assertOnDisk();
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
|
||||
#include <Storages/MergeTree/MergeTreeIOSettings.h>
|
||||
#include <Storages/MergeTree/KeyCondition.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Interpreters/TransactionVersionMetadata.h>
|
||||
#include <DataTypes/Serializations/SerializationInfo.h>
|
||||
#include <Storages/MergeTree/IPartMetadataManager.h>
|
||||
@ -136,6 +137,9 @@ public:
|
||||
|
||||
const NamesAndTypesList & getColumns() const { return columns; }
|
||||
|
||||
NameAndTypePair getColumn(const String & name) const;
|
||||
std::optional<NameAndTypePair> tryGetColumn(const String & column_name) const;
|
||||
|
||||
void setSerializationInfos(const SerializationInfoByName & new_infos);
|
||||
|
||||
const SerializationInfoByName & getSerializationInfos() const { return serialization_infos; }
|
||||
@ -167,8 +171,7 @@ public:
|
||||
|
||||
/// Returns the name of a column with minimum compressed size (as returned by getColumnSize()).
|
||||
/// If no checksums are present returns the name of the first physically existing column.
|
||||
String getColumnNameWithMinimumCompressedSize(
|
||||
const StorageSnapshotPtr & storage_snapshot, bool with_subcolumns) const;
|
||||
String getColumnNameWithMinimumCompressedSize(bool with_subcolumns) const;
|
||||
|
||||
bool contains(const IMergeTreeDataPart & other) const { return info.contains(other.info); }
|
||||
|
||||
@ -521,6 +524,10 @@ private:
|
||||
/// Map from name of column to its serialization info.
|
||||
SerializationInfoByName serialization_infos;
|
||||
|
||||
/// Columns description for more convinient access
|
||||
/// to columns by name and getting subcolumns.
|
||||
ColumnsDescription columns_description;
|
||||
|
||||
/// Reads part unique identifier (if exists) from uuid.txt
|
||||
void loadUUID();
|
||||
|
||||
|
@ -18,6 +18,7 @@ namespace
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int THERE_IS_NO_COLUMN;
|
||||
}
|
||||
|
||||
|
||||
@ -33,7 +34,6 @@ IMergeTreeReader::IMergeTreeReader(
|
||||
: data_part(data_part_)
|
||||
, avg_value_size_hints(avg_value_size_hints_)
|
||||
, columns(columns_)
|
||||
, part_columns(data_part->getColumns())
|
||||
, uncompressed_cache(uncompressed_cache_)
|
||||
, mark_cache(mark_cache_)
|
||||
, settings(settings_)
|
||||
@ -47,11 +47,7 @@ IMergeTreeReader::IMergeTreeReader(
|
||||
/// For wide parts convert plain arrays of Nested to subcolumns
|
||||
/// to allow to use shared offset column from cache.
|
||||
columns = Nested::convertToSubcolumns(columns);
|
||||
part_columns = Nested::collect(part_columns);
|
||||
}
|
||||
|
||||
for (const auto & column_from_part : part_columns)
|
||||
columns_from_part[column_from_part.name] = &column_from_part.type;
|
||||
}
|
||||
|
||||
IMergeTreeReader::~IMergeTreeReader() = default;
|
||||
@ -124,37 +120,25 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns
|
||||
}
|
||||
}
|
||||
|
||||
NameAndTypePair IMergeTreeReader::getColumnFromPart(const NameAndTypePair & required_column) const
|
||||
String IMergeTreeReader::getColumnNameInPart(const NameAndTypePair & required_column) const
|
||||
{
|
||||
auto name_in_storage = required_column.getNameInStorage();
|
||||
|
||||
ColumnsFromPart::ConstLookupResult it;
|
||||
if (alter_conversions.isColumnRenamed(name_in_storage))
|
||||
{
|
||||
String old_name = alter_conversions.getColumnOldName(name_in_storage);
|
||||
it = columns_from_part.find(old_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
it = columns_from_part.find(name_in_storage);
|
||||
name_in_storage = alter_conversions.getColumnOldName(name_in_storage);
|
||||
return Nested::concatenateName(name_in_storage, required_column.getSubcolumnName());
|
||||
}
|
||||
|
||||
if (it == columns_from_part.end())
|
||||
return required_column;
|
||||
return required_column.name;
|
||||
}
|
||||
|
||||
const DataTypePtr & type = *it->getMapped();
|
||||
if (required_column.isSubcolumn())
|
||||
{
|
||||
auto subcolumn_name = required_column.getSubcolumnName();
|
||||
auto subcolumn_type = type->tryGetSubcolumnType(subcolumn_name);
|
||||
NameAndTypePair IMergeTreeReader::getColumnInPart(const NameAndTypePair & required_column) const
|
||||
{
|
||||
auto column_in_part = data_part->tryGetColumn(getColumnNameInPart(required_column));
|
||||
if (column_in_part)
|
||||
return *column_in_part;
|
||||
|
||||
if (!subcolumn_type)
|
||||
return required_column;
|
||||
|
||||
return {String(it->getKey()), subcolumn_name, type, subcolumn_type};
|
||||
}
|
||||
|
||||
return {String(it->getKey()), type};
|
||||
return required_column;
|
||||
}
|
||||
|
||||
void IMergeTreeReader::performRequiredConversions(Columns & res_columns) const
|
||||
@ -183,7 +167,7 @@ void IMergeTreeReader::performRequiredConversions(Columns & res_columns) const
|
||||
if (res_columns[pos] == nullptr)
|
||||
continue;
|
||||
|
||||
copy_block.insert({res_columns[pos], getColumnFromPart(*name_and_type).type, name_and_type->name});
|
||||
copy_block.insert({res_columns[pos], getColumnInPart(*name_and_type).type, name_and_type->name});
|
||||
}
|
||||
|
||||
DB::performRequiredConversions(copy_block, columns, storage.getContext());
|
||||
|
@ -63,8 +63,10 @@ public:
|
||||
MergeTreeData::DataPartPtr data_part;
|
||||
|
||||
protected:
|
||||
/// Returns actual column type in part, which can differ from table metadata.
|
||||
NameAndTypePair getColumnFromPart(const NameAndTypePair & required_column) const;
|
||||
/// Returns actual column name in part, which can differ from table metadata.
|
||||
String getColumnNameInPart(const NameAndTypePair & required_column) const;
|
||||
/// Returns actual column name and type in part, which can differ from table metadata.
|
||||
NameAndTypePair getColumnInPart(const NameAndTypePair & required_column) const;
|
||||
|
||||
void checkNumberOfColumns(size_t num_columns_to_read) const;
|
||||
|
||||
@ -75,7 +77,6 @@ protected:
|
||||
|
||||
/// Columns that are read.
|
||||
NamesAndTypesList columns;
|
||||
NamesAndTypesList part_columns;
|
||||
|
||||
UncompressedCache * uncompressed_cache;
|
||||
MarkCache * mark_cache;
|
||||
@ -92,11 +93,6 @@ protected:
|
||||
private:
|
||||
/// Alter conversions, which must be applied on fly if required
|
||||
MergeTreeData::AlterConversions alter_conversions;
|
||||
|
||||
/// Actual data type of columns in part
|
||||
|
||||
using ColumnsFromPart = HashMapWithSavedHash<StringRef, const DataTypePtr *, StringRefHash>;
|
||||
ColumnsFromPart columns_from_part;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -122,7 +122,7 @@ NameSet injectRequiredColumns(
|
||||
*/
|
||||
if (!have_at_least_one_physical_column)
|
||||
{
|
||||
const auto minimum_size_column_name = part->getColumnNameWithMinimumCompressedSize(storage_snapshot, with_subcolumns);
|
||||
const auto minimum_size_column_name = part->getColumnNameWithMinimumCompressedSize(with_subcolumns);
|
||||
columns.push_back(minimum_size_column_name);
|
||||
/// correctly report added column
|
||||
injected_columns.insert(columns.back());
|
||||
|
@ -54,14 +54,14 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
|
||||
{
|
||||
if (name_and_type->isSubcolumn())
|
||||
{
|
||||
auto storage_column_from_part = getColumnFromPart(
|
||||
auto storage_column_from_part = getColumnInPart(
|
||||
{name_and_type->getNameInStorage(), name_and_type->getTypeInStorage()});
|
||||
|
||||
if (!storage_column_from_part.type->tryGetSubcolumnType(name_and_type->getSubcolumnName()))
|
||||
continue;
|
||||
}
|
||||
|
||||
auto column_from_part = getColumnFromPart(*name_and_type);
|
||||
auto column_from_part = getColumnInPart(*name_and_type);
|
||||
|
||||
auto position = data_part->getColumnPosition(column_from_part.getNameInStorage());
|
||||
if (!position && typeid_cast<const DataTypeArray *>(column_from_part.type.get()))
|
||||
@ -153,7 +153,7 @@ size_t MergeTreeReaderCompact::readRows(
|
||||
if (!column_positions[i])
|
||||
continue;
|
||||
|
||||
auto column_from_part = getColumnFromPart(*column_it);
|
||||
auto column_from_part = getColumnInPart(*column_it);
|
||||
if (res_columns[i] == nullptr)
|
||||
{
|
||||
auto serialization = data_part->getSerialization(column_from_part);
|
||||
@ -168,10 +168,11 @@ size_t MergeTreeReaderCompact::readRows(
|
||||
auto name_and_type = columns.begin();
|
||||
for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)
|
||||
{
|
||||
auto column_from_part = getColumnFromPart(*name_and_type);
|
||||
if (!res_columns[pos])
|
||||
continue;
|
||||
|
||||
auto column_from_part = getColumnInPart(*name_and_type);
|
||||
|
||||
try
|
||||
{
|
||||
auto & column = res_columns[pos];
|
||||
|
@ -34,7 +34,7 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory(
|
||||
{
|
||||
for (const auto & name_and_type : columns)
|
||||
{
|
||||
auto [name, type] = getColumnFromPart(name_and_type);
|
||||
auto [name, type] = getColumnInPart(name_and_type);
|
||||
|
||||
/// If array of Nested column is missing in part,
|
||||
/// we have to read its offsets if they exist.
|
||||
@ -67,7 +67,7 @@ size_t MergeTreeReaderInMemory::readRows(
|
||||
auto column_it = columns.begin();
|
||||
for (size_t i = 0; i < num_columns; ++i, ++column_it)
|
||||
{
|
||||
auto name_type = getColumnFromPart(*column_it);
|
||||
auto name_type = getColumnInPart(*column_it);
|
||||
|
||||
/// Copy offsets, if array of Nested column is missing in part.
|
||||
auto offsets_it = positions_for_offsets.find(name_type.name);
|
||||
|
@ -49,7 +49,7 @@ MergeTreeReaderWide::MergeTreeReaderWide(
|
||||
{
|
||||
for (const NameAndTypePair & column : columns)
|
||||
{
|
||||
auto column_from_part = getColumnFromPart(column);
|
||||
auto column_from_part = getColumnInPart(column);
|
||||
addStreams(column_from_part, profile_callback_, clock_type_);
|
||||
}
|
||||
}
|
||||
@ -83,7 +83,7 @@ size_t MergeTreeReaderWide::readRows(
|
||||
auto name_and_type = columns.begin();
|
||||
for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)
|
||||
{
|
||||
auto column_from_part = getColumnFromPart(*name_and_type);
|
||||
auto column_from_part = getColumnInPart(*name_and_type);
|
||||
try
|
||||
{
|
||||
auto & cache = caches[column_from_part.getNameInStorage()];
|
||||
@ -102,7 +102,7 @@ size_t MergeTreeReaderWide::readRows(
|
||||
|
||||
for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)
|
||||
{
|
||||
auto column_from_part = getColumnFromPart(*name_and_type);
|
||||
auto column_from_part = getColumnInPart(*name_and_type);
|
||||
const auto & [name, type] = column_from_part;
|
||||
|
||||
/// The column is already present in the block so we will append the values to the end.
|
||||
|
13
tests/queries/0_stateless/01825_type_json_sparse.reference
Normal file
13
tests/queries/0_stateless/01825_type_json_sparse.reference
Normal file
@ -0,0 +1,13 @@
|
||||
k1 Default 2
|
||||
k2.k3 Default 1
|
||||
=============
|
||||
k1 Default 1
|
||||
k2.k3 Sparse 1
|
||||
=============
|
||||
k1 Default 1
|
||||
k2.k3 Sparse 1
|
||||
=============
|
||||
k1 Default 1
|
||||
k2.k3 Sparse 1
|
||||
1 1 4
|
||||
2 400000 0
|
59
tests/queries/0_stateless/01825_type_json_sparse.sql
Normal file
59
tests/queries/0_stateless/01825_type_json_sparse.sql
Normal file
@ -0,0 +1,59 @@
|
||||
-- Tags: no-fasttest
|
||||
|
||||
DROP TABLE IF EXISTS t_json_sparse;
|
||||
|
||||
SET allow_experimental_object_type = 1;
|
||||
|
||||
CREATE TABLE t_json_sparse (data JSON)
|
||||
ENGINE = MergeTree ORDER BY tuple()
|
||||
SETTINGS ratio_of_defaults_for_sparse_serialization = 0.1,
|
||||
min_bytes_for_wide_part = 0;
|
||||
|
||||
SYSTEM STOP MERGES t_json_sparse;
|
||||
|
||||
INSERT INTO t_json_sparse VALUES ('{"k1": 1, "k2": {"k3": 4}}');
|
||||
INSERT INTO t_json_sparse SELECT '{"k1": 2}' FROM numbers(200000);
|
||||
|
||||
SELECT subcolumns.names, subcolumns.serializations, count() FROM system.parts_columns
|
||||
ARRAY JOIN subcolumns
|
||||
WHERE database = currentDatabase()
|
||||
AND table = 't_json_sparse' AND column = 'data' AND active
|
||||
GROUP BY subcolumns.names, subcolumns.serializations;
|
||||
|
||||
SELECT '=============';
|
||||
|
||||
SYSTEM START MERGES t_json_sparse;
|
||||
OPTIMIZE TABLE t_json_sparse FINAL;
|
||||
|
||||
SELECT subcolumns.names, subcolumns.serializations, count() FROM system.parts_columns
|
||||
ARRAY JOIN subcolumns
|
||||
WHERE database = currentDatabase()
|
||||
AND table = 't_json_sparse' AND column = 'data' AND active
|
||||
GROUP BY subcolumns.names, subcolumns.serializations;
|
||||
|
||||
SELECT '=============';
|
||||
|
||||
DETACH TABLE t_json_sparse;
|
||||
ATTACH TABLE t_json_sparse;
|
||||
|
||||
SELECT subcolumns.names, subcolumns.serializations, count() FROM system.parts_columns
|
||||
ARRAY JOIN subcolumns
|
||||
WHERE database = currentDatabase()
|
||||
AND table = 't_json_sparse' AND column = 'data' AND active
|
||||
GROUP BY subcolumns.names, subcolumns.serializations;
|
||||
|
||||
INSERT INTO t_json_sparse SELECT '{"k1": 2}' FROM numbers(200000);
|
||||
|
||||
SELECT '=============';
|
||||
|
||||
OPTIMIZE TABLE t_json_sparse FINAL;
|
||||
|
||||
SELECT subcolumns.names, subcolumns.serializations, count() FROM system.parts_columns
|
||||
ARRAY JOIN subcolumns
|
||||
WHERE database = currentDatabase()
|
||||
AND table = 't_json_sparse' AND column = 'data' AND active
|
||||
GROUP BY subcolumns.names, subcolumns.serializations;
|
||||
|
||||
SELECT data.k1, count(), sum(data.k2.k3) FROM t_json_sparse GROUP BY data.k1;
|
||||
|
||||
DROP TABLE t_json_sparse;
|
Loading…
Reference in New Issue
Block a user