mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-18 13:42:02 +00:00
implement nested with multiple nesting
This commit is contained in:
parent
5242c4f4b9
commit
01a04e06ef
@ -10,12 +10,12 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeOneElementTuple.h>
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <Core/NamesAndTypes.h>
|
||||
@ -32,8 +32,8 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
DataTypeArray::DataTypeArray(const DataTypePtr & nested_, size_t nested_level_)
|
||||
: nested{nested_}, nested_level{nested_level_}
|
||||
DataTypeArray::DataTypeArray(const DataTypePtr & nested_)
|
||||
: nested{nested_}
|
||||
{
|
||||
}
|
||||
|
||||
@ -527,7 +527,7 @@ DataTypePtr DataTypeArray::tryGetSubcolumnType(const String & subcolumn_name) co
|
||||
DataTypePtr DataTypeArray::tryGetSubcolumnTypeImpl(const String & subcolumn_name, size_t level) const
|
||||
{
|
||||
if (subcolumn_name == "size" + std::to_string(level))
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
return std::make_shared<DataTypeOneElementTuple>(std::make_shared<DataTypeUInt64>(), subcolumn_name, false);
|
||||
|
||||
DataTypePtr subcolumn;
|
||||
if (const auto * nested_array = typeid_cast<const DataTypeArray *>(nested.get()))
|
||||
@ -535,7 +535,7 @@ DataTypePtr DataTypeArray::tryGetSubcolumnTypeImpl(const String & subcolumn_name
|
||||
else
|
||||
subcolumn = nested->tryGetSubcolumnType(subcolumn_name);
|
||||
|
||||
return (subcolumn ? std::make_shared<DataTypeArray>(std::move(subcolumn), nested_level + 1) : subcolumn);
|
||||
return (subcolumn ? std::make_shared<DataTypeArray>(std::move(subcolumn)) : subcolumn);
|
||||
}
|
||||
|
||||
MutableColumnPtr DataTypeArray::getSubcolumn(const String & subcolumn_name, IColumn & column) const
|
||||
|
@ -13,12 +13,10 @@ private:
|
||||
/// The type of array elements.
|
||||
DataTypePtr nested;
|
||||
|
||||
size_t nested_level = 0;
|
||||
|
||||
public:
|
||||
static constexpr bool is_parametric = true;
|
||||
|
||||
DataTypeArray(const DataTypePtr & nested_, size_t nested_level_ = 0);
|
||||
DataTypeArray(const DataTypePtr & nested_);
|
||||
|
||||
TypeIndex getTypeId() const override { return TypeIndex::Array; }
|
||||
|
||||
@ -37,8 +35,6 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t getNestedLevel() const override { return nested_level; }
|
||||
|
||||
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
|
||||
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
|
||||
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
|
||||
|
@ -180,6 +180,7 @@ DataTypeFactory::DataTypeFactory()
|
||||
registerDataTypeDomainIPv4AndIPv6(*this);
|
||||
registerDataTypeDomainSimpleAggregateFunction(*this);
|
||||
registerDataTypeDomainGeo(*this);
|
||||
registerDataTypeOneElementTuple(*this);
|
||||
}
|
||||
|
||||
DataTypeFactory & DataTypeFactory::instance()
|
||||
|
@ -83,5 +83,6 @@ void registerDataTypeLowCardinality(DataTypeFactory & factory);
|
||||
void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory);
|
||||
void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory);
|
||||
void registerDataTypeDomainGeo(DataTypeFactory & factory);
|
||||
void registerDataTypeOneElementTuple(DataTypeFactory & factory);
|
||||
|
||||
}
|
||||
|
68
src/DataTypes/DataTypeNested.cpp
Normal file
68
src/DataTypes/DataTypeNested.cpp
Normal file
@ -0,0 +1,68 @@
|
||||
#include <DataTypes/DataTypeNested.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Parsers/ASTNameTypePair.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int EMPTY_DATA_PASSED;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
String DataTypeNestedCustomName::getName() const
|
||||
{
|
||||
WriteBufferFromOwnString s;
|
||||
s << "Nested(";
|
||||
for (size_t i = 0; i < elems.size(); ++i)
|
||||
{
|
||||
if (i != 0)
|
||||
s << ", ";
|
||||
|
||||
s << backQuoteIfNeed(names[i]) << ' ';
|
||||
s << elems[i]->getName();
|
||||
}
|
||||
s << ")";
|
||||
|
||||
return s.str();
|
||||
}
|
||||
|
||||
static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & arguments)
|
||||
{
|
||||
if (!arguments || arguments->children.empty())
|
||||
throw Exception("Nested cannot be empty", ErrorCodes::EMPTY_DATA_PASSED);
|
||||
|
||||
DataTypes nested_types;
|
||||
Strings nested_names;
|
||||
nested_types.reserve(arguments->children.size());
|
||||
nested_names.reserve(arguments->children.size());
|
||||
|
||||
for (const auto & child : arguments->children)
|
||||
{
|
||||
const auto * name_type = child->as<ASTNameTypePair>();
|
||||
if (!name_type)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Data type Nested accepts only pairs with name and type");
|
||||
|
||||
auto nested_type = DataTypeFactory::instance().get(name_type->type);
|
||||
nested_types.push_back(std::move(nested_type));
|
||||
nested_names.push_back(name_type->name);
|
||||
}
|
||||
|
||||
auto data_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(nested_types, nested_names));
|
||||
auto custom_name = std::make_unique<DataTypeNestedCustomName>(nested_types, nested_names);
|
||||
|
||||
return std::make_pair(std::move(data_type), std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr));
|
||||
}
|
||||
|
||||
void registerDataTypeNested(DataTypeFactory & factory)
|
||||
{
|
||||
return factory.registerDataTypeCustom("Nested", create);
|
||||
}
|
||||
|
||||
}
|
32
src/DataTypes/DataTypeNested.h
Normal file
32
src/DataTypes/DataTypeNested.h
Normal file
@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/DataTypeWithSimpleSerialization.h>
|
||||
#include <DataTypes/DataTypeCustom.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class DataTypeNestedCustomName final : public IDataTypeCustomName
|
||||
{
|
||||
private:
|
||||
DataTypes elems;
|
||||
Strings names;
|
||||
|
||||
public:
|
||||
DataTypeNestedCustomName(const DataTypes & elems_, const Strings & names_)
|
||||
: elems(elems_), names(names_)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override;
|
||||
};
|
||||
|
||||
template <typename DataType>
|
||||
inline bool isNested(const DataType & data_type)
|
||||
{
|
||||
return isArray(data_type) && typeid_cast<const DataTypeNestedCustomName *>(data_type->getCustomName());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <DataTypes/DataTypeNothing.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeOneElementTuple.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
@ -530,7 +531,7 @@ bool DataTypeNullable::equals(const IDataType & rhs) const
|
||||
DataTypePtr DataTypeNullable::tryGetSubcolumnType(const String & subcolumn_name) const
|
||||
{
|
||||
if (subcolumn_name == "null")
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
return std::make_shared<DataTypeOneElementTuple>(std::make_shared<DataTypeUInt8>(), subcolumn_name, false);
|
||||
|
||||
return nested_data_type->tryGetSubcolumnType(subcolumn_name);
|
||||
}
|
||||
|
121
src/DataTypes/DataTypeOneElementTuple.cpp
Normal file
121
src/DataTypes/DataTypeOneElementTuple.cpp
Normal file
@ -0,0 +1,121 @@
|
||||
#include <Core/Field.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <DataTypes/DataTypeOneElementTuple.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTNameTypePair.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
void DataTypeOneElementTuple::addToPath(SubstreamPath & path) const
|
||||
{
|
||||
path.push_back(Substream::TupleElement);
|
||||
path.back().tuple_element_name = name;
|
||||
path.back().escape_tuple_delimiter = escape_delimiter;
|
||||
}
|
||||
|
||||
std::string DataTypeOneElementTuple::doGetName() const
|
||||
{
|
||||
WriteBufferFromOwnString s;
|
||||
s << TYPE_NAME << "(" << backQuoteIfNeed(name) << " " << element->getName() << ")";
|
||||
return s.str();
|
||||
}
|
||||
|
||||
bool DataTypeOneElementTuple::equals(const IDataType & rhs) const
|
||||
{
|
||||
const auto * rhs_tuple = typeid_cast<const DataTypeOneElementTuple *>(&rhs);
|
||||
if (!rhs_tuple)
|
||||
return false;
|
||||
|
||||
return element->equals(*rhs_tuple->element)
|
||||
&& name == rhs_tuple->name
|
||||
&& escape_delimiter == rhs_tuple->escape_delimiter;
|
||||
}
|
||||
|
||||
void DataTypeOneElementTuple::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
|
||||
{
|
||||
addToPath(path);
|
||||
element->enumerateStreams(callback, path);
|
||||
path.pop_back();
|
||||
}
|
||||
|
||||
void DataTypeOneElementTuple::DataTypeOneElementTuple::serializeBinaryBulkStatePrefix(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
addToPath(settings.path);
|
||||
element->serializeBinaryBulkStatePrefix(settings, state);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
void DataTypeOneElementTuple::serializeBinaryBulkStateSuffix(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
addToPath(settings.path);
|
||||
element->serializeBinaryBulkStateSuffix(settings, state);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
void DataTypeOneElementTuple::deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
addToPath(settings.path);
|
||||
element->deserializeBinaryBulkStatePrefix(settings, state);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
void DataTypeOneElementTuple::serializeBinaryBulkWithMultipleStreams(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
addToPath(settings.path);
|
||||
element->serializeBinaryBulkWithMultipleStreams(column, offset, limit, settings, state);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
void DataTypeOneElementTuple::deserializeBinaryBulkWithMultipleStreams(
|
||||
IColumn & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
addToPath(settings.path);
|
||||
element->deserializeBinaryBulkWithMultipleStreams(column, limit, settings, state);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
static DataTypePtr create(const ASTPtr & arguments)
|
||||
{
|
||||
if (!arguments || arguments->children.size() != 1)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Data type {} takes only 1 argument", DataTypeOneElementTuple::TYPE_NAME);
|
||||
|
||||
const auto * name_type = arguments->children[0]->as<ASTNameTypePair>();
|
||||
if (!name_type)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Data type {} takes only pair with name and type", DataTypeOneElementTuple::TYPE_NAME);
|
||||
|
||||
auto nested_type = DataTypeFactory::instance().get(name_type->type);
|
||||
return std::make_shared<DataTypeOneElementTuple>(std::move(nested_type), name_type->name);
|
||||
}
|
||||
|
||||
void registerDataTypeOneElementTuple(DataTypeFactory & factory)
|
||||
{
|
||||
factory.registerDataType(DataTypeOneElementTuple::TYPE_NAME, create);
|
||||
}
|
||||
|
||||
}
|
103
src/DataTypes/DataTypeOneElementTuple.h
Normal file
103
src/DataTypes/DataTypeOneElementTuple.h
Normal file
@ -0,0 +1,103 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/DataTypeWithSimpleSerialization.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class DataTypeOneElementTuple final : public DataTypeWithSimpleSerialization
|
||||
{
|
||||
private:
|
||||
DataTypePtr element;
|
||||
String name;
|
||||
bool escape_delimiter;
|
||||
|
||||
public:
|
||||
static constexpr bool is_parametric = true;
|
||||
static constexpr auto TYPE_NAME = "__OneElementTuple";
|
||||
|
||||
DataTypeOneElementTuple(const DataTypePtr & element_, const String & name_, bool escape_delimiter_ = true)
|
||||
: element(element_), name(name_), escape_delimiter(escape_delimiter_) {}
|
||||
|
||||
/// Customized methods.
|
||||
const char * getFamilyName() const override { return TYPE_NAME; }
|
||||
std::string doGetName() const override;
|
||||
|
||||
void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkStateSuffix(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkWithMultipleStreams(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkWithMultipleStreams(
|
||||
IColumn & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
bool equals(const IDataType & rhs) const override;
|
||||
|
||||
bool isParametric() const override { return true; }
|
||||
bool haveSubtypes() const override { return true; }
|
||||
|
||||
/// Non-customized methods.
|
||||
TypeIndex getTypeId() const override { return element->getTypeId(); }
|
||||
|
||||
void serializeBinary(const Field & field, WriteBuffer & ostr) const override { element->serializeBinary(field, ostr); }
|
||||
void deserializeBinary(Field & field, ReadBuffer & istr) const override { element->deserializeBinary(field, istr); }
|
||||
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override { element->serializeBinary(column, row_num, ostr); }
|
||||
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override { element->deserializeBinary(column, istr); }
|
||||
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
|
||||
{
|
||||
element->serializeAsText(column, row_num, ostr, settings);
|
||||
}
|
||||
|
||||
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
|
||||
{
|
||||
element->deserializeAsWholeText(column, istr, settings);
|
||||
}
|
||||
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override
|
||||
{
|
||||
element->serializeProtobuf(column, row_num, protobuf, value_index);
|
||||
}
|
||||
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override
|
||||
{
|
||||
element->deserializeProtobuf(column, protobuf, allow_add_row, row_added);
|
||||
}
|
||||
|
||||
bool canBeInsideNullable() const override { return element->canBeInsideNullable(); }
|
||||
MutableColumnPtr createColumn() const override { return element->createColumn(); }
|
||||
Field getDefault() const override { return element->getDefault(); }
|
||||
void insertDefaultInto(IColumn & column) const override { element->insertDefaultInto(column); }
|
||||
bool isComparable() const override { return element->isComparable(); }
|
||||
bool textCanContainOnlyValidUTF8() const override { return element->textCanContainOnlyValidUTF8(); }
|
||||
bool haveMaximumSizeOfValue() const override { return element->haveMaximumSizeOfValue(); }
|
||||
size_t getMaximumSizeOfValueInMemory() const override { return element->getMaximumSizeOfValueInMemory(); }
|
||||
size_t getSizeOfValueInMemory() const override { return element->getSizeOfValueInMemory(); }
|
||||
|
||||
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override { return element->tryGetSubcolumnType(subcolumn_name); }
|
||||
MutableColumnPtr getSubcolumn(const String & subcolumn_name, IColumn & column) const override { return element->getSubcolumn(subcolumn_name, column); }
|
||||
|
||||
private:
|
||||
void addToPath(SubstreamPath & path) const;
|
||||
};
|
||||
|
||||
}
|
@ -5,6 +5,8 @@
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeOneElementTuple.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTNameTypePair.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
@ -538,11 +540,14 @@ DataTypePtr DataTypeTuple::tryGetSubcolumnType(const String & subcolumn_name) co
|
||||
if (startsWith(subcolumn_name, names[i]))
|
||||
{
|
||||
size_t name_length = names[i].size();
|
||||
DataTypePtr subcolumn_type;
|
||||
if (subcolumn_name.size() == name_length)
|
||||
return elems[i];
|
||||
subcolumn_type = elems[i];
|
||||
else if (subcolumn_name[name_length] == '.')
|
||||
subcolumn_type = elems[i]->tryGetSubcolumnType(subcolumn_name.substr(name_length + 1));
|
||||
|
||||
if (subcolumn_name[name_length] == '.')
|
||||
return elems[i]->tryGetSubcolumnType(subcolumn_name.substr(name_length + 1));
|
||||
if (subcolumn_type)
|
||||
return std::make_shared<DataTypeOneElementTuple>(std::move(subcolumn_type), names[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -604,15 +609,7 @@ static DataTypePtr create(const ASTPtr & arguments)
|
||||
void registerDataTypeTuple(DataTypeFactory & factory)
|
||||
{
|
||||
factory.registerDataType("Tuple", create);
|
||||
}
|
||||
|
||||
void registerDataTypeNested(DataTypeFactory & factory)
|
||||
{
|
||||
/// Nested(...) data type is just a sugar for Array(Tuple(...))
|
||||
factory.registerDataType("Nested", [&factory](const ASTPtr & arguments)
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(factory.get("Tuple", arguments));
|
||||
});
|
||||
// factory.registerDataTypeCustom(DATA_TYPE_ONE_ELEMENT_TUPLE_NAME, createOneElementTuple);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -108,28 +108,33 @@ MutableColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, IColumn
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
|
||||
}
|
||||
|
||||
std::vector<String> IDataType::getSubcolumnNames() const
|
||||
Names IDataType::getSubcolumnNames() const
|
||||
{
|
||||
std::vector<String> res;
|
||||
enumerateStreams([&res, this](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
NameSet res;
|
||||
enumerateStreams([&res, this](const SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
auto subcolumn_name = IDataType::getSubcolumnNameForStream("", substream_path);
|
||||
if (!subcolumn_name.empty())
|
||||
SubstreamPath new_path;
|
||||
/// Iterate over path to try to get intermediate subcolumns for complex nested types.
|
||||
for (const auto & elem : substream_path)
|
||||
{
|
||||
subcolumn_name = subcolumn_name.substr(1); // It starts with a dot.
|
||||
/// Not all of substreams have its subcolumn.
|
||||
if (tryGetSubcolumnType(subcolumn_name))
|
||||
res.push_back(subcolumn_name);
|
||||
new_path.push_back(elem);
|
||||
auto subcolumn_name = getSubcolumnNameForStream(new_path);
|
||||
if (!subcolumn_name.empty() && tryGetSubcolumnType(subcolumn_name))
|
||||
{
|
||||
/// Not all of substreams have its subcolumn.
|
||||
if (tryGetSubcolumnType(subcolumn_name))
|
||||
res.insert(subcolumn_name);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return res;
|
||||
return Names(std::make_move_iterator(res.begin()), std::make_move_iterator(res.end()));
|
||||
}
|
||||
|
||||
static String getNameForSubstreamPath(
|
||||
String stream_name,
|
||||
const IDataType::SubstreamPath & path,
|
||||
const String & tuple_element_delimeter = ".")
|
||||
bool escape_tuple_delimiter)
|
||||
{
|
||||
size_t array_level = 0;
|
||||
for (const auto & elem : path)
|
||||
@ -140,80 +145,41 @@ static String getNameForSubstreamPath(
|
||||
stream_name += ".size" + toString(array_level);
|
||||
else if (elem.type == IDataType::Substream::ArrayElements)
|
||||
++array_level;
|
||||
else if (elem.type == IDataType::Substream::TupleElement)
|
||||
stream_name += tuple_element_delimeter + escapeForFileName(elem.tuple_element_name);
|
||||
else if (elem.type == IDataType::Substream::DictionaryKeys)
|
||||
stream_name += ".dict";
|
||||
else if (elem.type == IDataType::Substream::TupleElement)
|
||||
{
|
||||
/// For compatibility reasons, we use %2E (escaped dot) instead of dot.
|
||||
/// Because nested data may be represented not by Array of Tuple,
|
||||
/// but by separate Array columns with names in a form of a.b,
|
||||
/// and name is encoded as a whole.
|
||||
stream_name += (escape_tuple_delimiter && elem.escape_tuple_delimiter ?
|
||||
escapeForFileName(".") : ".") + escapeForFileName(elem.tuple_element_name);
|
||||
}
|
||||
}
|
||||
|
||||
return stream_name;
|
||||
}
|
||||
|
||||
static bool isOldStyleNestedSizes(const NameAndTypePair & column, const IDataType::SubstreamPath & path)
|
||||
String IDataType::getFileNameForStream(const NameAndTypePair & column, const SubstreamPath & path)
|
||||
{
|
||||
auto storage_name = column.getStorageName();
|
||||
auto nested_storage_name = Nested::extractTableName(column.getStorageName());
|
||||
|
||||
if (storage_name == nested_storage_name)
|
||||
return false;
|
||||
|
||||
return (path.size() == 1 && path[0].type == IDataType::Substream::ArraySizes) || column.getSubcolumnName() == "size0";
|
||||
}
|
||||
|
||||
static String getDelimiterForSubcolumnPart(const String & subcolumn_part)
|
||||
{
|
||||
if (subcolumn_part == "null" || startsWith(subcolumn_part, "size"))
|
||||
return ".";
|
||||
|
||||
return "%2E";
|
||||
}
|
||||
|
||||
/// FIXME: rewrite it.
|
||||
String IDataType::getFileNameForStream(const NameAndTypePair & column, const IDataType::SubstreamPath & path)
|
||||
{
|
||||
auto storage_name = column.getStorageName();
|
||||
if (isOldStyleNestedSizes(column, path))
|
||||
storage_name = Nested::extractTableName(storage_name);
|
||||
if (storage_name != nested_storage_name && (path.size() == 1 && path[0].type == IDataType::Substream::ArraySizes))
|
||||
storage_name = nested_storage_name;
|
||||
|
||||
auto stream_name = escapeForFileName(storage_name);
|
||||
auto subcolumn_name = column.getSubcolumnName();
|
||||
|
||||
if (!subcolumn_name.empty())
|
||||
{
|
||||
std::vector<String> subcolumn_parts;
|
||||
boost::split(subcolumn_parts, subcolumn_name, [](char c) { return c == '.'; });
|
||||
|
||||
size_t current_nested_level = 0;
|
||||
for (const auto & elem : path)
|
||||
{
|
||||
if (elem.type == Substream::ArrayElements && elem.is_part_of_nested)
|
||||
{
|
||||
++current_nested_level;
|
||||
}
|
||||
else if (elem.type == Substream::ArraySizes)
|
||||
{
|
||||
size_t nested_level = column.type->getNestedLevel();
|
||||
for (size_t i = 0; i < nested_level - current_nested_level; ++i)
|
||||
{
|
||||
if (subcolumn_parts.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get substream name for column {}."
|
||||
" Not enough subcolumn parts. Needed: {}", column.name, nested_level - current_nested_level);
|
||||
|
||||
subcolumn_parts.pop_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto & subcolumn_part : subcolumn_parts)
|
||||
stream_name += getDelimiterForSubcolumnPart(subcolumn_part) + escapeForFileName(subcolumn_part);
|
||||
}
|
||||
|
||||
return getNameForSubstreamPath(std::move(stream_name), path, "%2E");
|
||||
return getNameForSubstreamPath(std::move(stream_name), path, true);
|
||||
}
|
||||
|
||||
String IDataType::getSubcolumnNameForStream(String stream_name, const SubstreamPath & path)
|
||||
String IDataType::getSubcolumnNameForStream(const SubstreamPath & path)
|
||||
{
|
||||
return getNameForSubstreamPath(std::move(stream_name), path);
|
||||
auto subcolumn_name = getNameForSubstreamPath("", path, false);
|
||||
if (!subcolumn_name.empty())
|
||||
subcolumn_name = subcolumn_name.substr(1); // It starts with a dot.
|
||||
|
||||
return subcolumn_name;
|
||||
}
|
||||
|
||||
bool IDataType::isSpecialCompressionAllowed(const SubstreamPath & path)
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Common/COW.h>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <DataTypes/DataTypeCustom.h>
|
||||
#include <Core/Names.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -101,7 +102,7 @@ public:
|
||||
/// Index of tuple element, starting at 1.
|
||||
String tuple_element_name;
|
||||
|
||||
bool is_part_of_nested = false;
|
||||
bool escape_tuple_delimiter = true;
|
||||
|
||||
Substream(Type type_) : type(type_) {}
|
||||
};
|
||||
@ -120,7 +121,7 @@ public:
|
||||
virtual DataTypePtr tryGetSubcolumnType(const String & /* subcolumn_name */) const { return nullptr; }
|
||||
DataTypePtr getSubcolumnType(const String & subcolumn_name) const;
|
||||
virtual MutableColumnPtr getSubcolumn(const String & subcolumn_name, IColumn & column) const;
|
||||
std::vector<String> getSubcolumnNames() const;
|
||||
Names getSubcolumnNames() const;
|
||||
|
||||
using OutputStreamGetter = std::function<WriteBuffer*(const SubstreamPath &)>;
|
||||
using InputStreamGetter = std::function<ReadBuffer*(const SubstreamPath &)>;
|
||||
@ -449,13 +450,11 @@ public:
|
||||
/// Strings, Numbers, Date, DateTime, Nullable
|
||||
virtual bool canBeInsideLowCardinality() const { return false; }
|
||||
|
||||
virtual size_t getNestedLevel() const { return 0; }
|
||||
|
||||
/// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column.
|
||||
static void updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint);
|
||||
|
||||
static String getFileNameForStream(const NameAndTypePair & column, const SubstreamPath & path);
|
||||
static String getSubcolumnNameForStream(String stream_name, const SubstreamPath & path);
|
||||
static String getSubcolumnNameForStream(const SubstreamPath & path);
|
||||
|
||||
/// Substream path supports special compression methods like codec Delta.
|
||||
/// For all other substreams (like ArraySizes, NullMasks, etc.) we use only
|
||||
|
@ -27,9 +27,11 @@ SRCS(
|
||||
DataTypeInterval.cpp
|
||||
DataTypeLowCardinality.cpp
|
||||
DataTypeLowCardinalityHelpers.cpp
|
||||
DataTypeNested.cpp
|
||||
DataTypeNothing.cpp
|
||||
DataTypeNullable.cpp
|
||||
DataTypeNumberBase.cpp
|
||||
DataTypeOneElementTuple.cpp
|
||||
DataTypesDecimal.cpp
|
||||
DataTypesNumber.cpp
|
||||
DataTypeString.cpp
|
||||
|
@ -16,6 +16,8 @@ public:
|
||||
/// type
|
||||
ASTPtr type;
|
||||
|
||||
size_t level_of_nesting = 0;
|
||||
|
||||
/** Get the text that identifies this element. */
|
||||
String getID(char delim) const override { return "NameTypePair" + (delim + name); }
|
||||
ASTPtr clone() const override;
|
||||
|
@ -82,7 +82,7 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
/// Parse optional parameters
|
||||
ASTPtr expr_list_args;
|
||||
|
||||
ParserList args_parser_nested(std::make_unique<ParserNestedTable>(), std::make_unique<ParserToken>(TokenType::Comma), false);
|
||||
ParserList args_parser_nested(std::make_unique<ParserDataType>(), std::make_unique<ParserToken>(TokenType::Comma), false);
|
||||
if (args_parser_nested.parse(pos, expr_list_args, expected))
|
||||
{
|
||||
if (pos->type != TokenType::ClosingRoundBracket)
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeNested.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Storages/IStorage.h>
|
||||
@ -252,6 +253,12 @@ void ColumnsDescription::flattenNested()
|
||||
{
|
||||
for (auto it = columns.begin(); it != columns.end();)
|
||||
{
|
||||
if (isNested(it->type))
|
||||
{
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto * type_arr = typeid_cast<const DataTypeArray *>(it->type.get());
|
||||
if (!type_arr)
|
||||
{
|
||||
|
@ -54,8 +54,10 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
|
||||
for (size_t i = 0; i < columns_num; ++i, ++name_and_type)
|
||||
{
|
||||
auto column_from_part = getColumnFromPart(*name_and_type);
|
||||
auto position = data_part->getColumnPosition(column_from_part);
|
||||
if (duplicated_subcolumns.count(column_from_part.name))
|
||||
continue;
|
||||
|
||||
auto position = data_part->getColumnPosition(column_from_part);
|
||||
if (!position && typeid_cast<const DataTypeArray *>(column_from_part.type.get()))
|
||||
{
|
||||
/// If array of Nested column is missing in part,
|
||||
@ -148,11 +150,10 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading,
|
||||
{
|
||||
auto column_from_part = getColumnFromPart(*name_and_type);
|
||||
|
||||
if (!res_columns[pos] || duplicated_subcolumns.count(column_from_part.name))
|
||||
if (!res_columns[pos])
|
||||
continue;
|
||||
|
||||
auto & column = mutable_columns[pos];
|
||||
|
||||
try
|
||||
{
|
||||
size_t column_size_before_reading = column->size();
|
||||
@ -160,7 +161,6 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading,
|
||||
readData(column_from_part, *column, from_mark, *column_positions[pos], rows_to_read, read_only_offsets[pos]);
|
||||
|
||||
size_t read_rows_in_column = column->size() - column_size_before_reading;
|
||||
|
||||
if (read_rows_in_column < rows_to_read)
|
||||
throw Exception("Cannot read all data in MergeTreeReaderCompact. Rows read: " + toString(read_rows_in_column) +
|
||||
". Rows expected: " + toString(rows_to_read) + ".", ErrorCodes::CANNOT_READ_ALL_DATA);
|
||||
|
Loading…
Reference in New Issue
Block a user