mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
fix deducing Array(Nested(...)) and Nested(Array(...))
This commit is contained in:
parent
0a7895ebb9
commit
9b5e702f6c
@ -327,14 +327,9 @@ namespace
|
|||||||
void flattenTupleImpl(
|
void flattenTupleImpl(
|
||||||
PathInDataBuilder & builder,
|
PathInDataBuilder & builder,
|
||||||
DataTypePtr type,
|
DataTypePtr type,
|
||||||
size_t array_level,
|
std::vector<PathInData::Parts> & new_paths,
|
||||||
PathsInData & new_paths,
|
|
||||||
DataTypes & new_types)
|
DataTypes & new_types)
|
||||||
{
|
{
|
||||||
bool is_nested = isNested(type);
|
|
||||||
if (is_nested)
|
|
||||||
type = assert_cast<const DataTypeArray &>(*type).getNestedType();
|
|
||||||
|
|
||||||
if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
|
if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
|
||||||
{
|
{
|
||||||
const auto & tuple_names = type_tuple->getElementNames();
|
const auto & tuple_names = type_tuple->getElementNames();
|
||||||
@ -342,19 +337,32 @@ void flattenTupleImpl(
|
|||||||
|
|
||||||
for (size_t i = 0; i < tuple_names.size(); ++i)
|
for (size_t i = 0; i < tuple_names.size(); ++i)
|
||||||
{
|
{
|
||||||
builder.append(tuple_names[i], is_nested);
|
builder.append(tuple_names[i], false);
|
||||||
flattenTupleImpl(builder, tuple_types[i], array_level + is_nested, new_paths, new_types);
|
flattenTupleImpl(builder, tuple_types[i], new_paths, new_types);
|
||||||
builder.popBack();
|
builder.popBack();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (const auto * type_array = typeid_cast<const DataTypeArray *>(type.get()))
|
else if (const auto * type_array = typeid_cast<const DataTypeArray *>(type.get()))
|
||||||
{
|
{
|
||||||
flattenTupleImpl(builder, type_array->getNestedType(), array_level + 1, new_paths, new_types);
|
PathInDataBuilder element_builder;
|
||||||
|
std::vector<PathInData::Parts> element_paths;
|
||||||
|
DataTypes element_types;
|
||||||
|
|
||||||
|
flattenTupleImpl(element_builder, type_array->getNestedType(), element_paths, element_types);
|
||||||
|
assert(element_paths.size() == element_types.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < element_paths.size(); ++i)
|
||||||
|
{
|
||||||
|
builder.append(element_paths[i], true);
|
||||||
|
new_paths.emplace_back(builder.getParts());
|
||||||
|
new_types.emplace_back(std::make_shared<DataTypeArray>(element_types[i]));
|
||||||
|
builder.popBack(element_paths[i].size());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
new_paths.emplace_back(builder.getParts());
|
new_paths.emplace_back(builder.getParts());
|
||||||
new_types.push_back(createArrayOfType(type, array_level));
|
new_types.emplace_back(type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -428,16 +436,16 @@ struct ColumnWithTypeAndDimensions
|
|||||||
using SubcolumnsTreeWithTypes = SubcolumnsTree<ColumnWithTypeAndDimensions>;
|
using SubcolumnsTreeWithTypes = SubcolumnsTree<ColumnWithTypeAndDimensions>;
|
||||||
using Node = SubcolumnsTreeWithTypes::Node;
|
using Node = SubcolumnsTreeWithTypes::Node;
|
||||||
|
|
||||||
std::pair<ColumnPtr, DataTypePtr> createTypeFromNode(const Node * node)
|
ColumnWithTypeAndDimensions createTypeFromNode(const Node * node)
|
||||||
{
|
{
|
||||||
auto collect_tuple_elemets = [](const auto & children)
|
auto collect_tuple_elemets = [](const auto & children)
|
||||||
{
|
{
|
||||||
std::vector<std::tuple<String, ColumnPtr, DataTypePtr>> tuple_elements;
|
std::vector<std::tuple<String, ColumnWithTypeAndDimensions>> tuple_elements;
|
||||||
tuple_elements.reserve(children.size());
|
tuple_elements.reserve(children.size());
|
||||||
for (const auto & [name, child] : children)
|
for (const auto & [name, child] : children)
|
||||||
{
|
{
|
||||||
auto [column, type] = createTypeFromNode(child.get());
|
auto column = createTypeFromNode(child.get());
|
||||||
tuple_elements.emplace_back(name, std::move(column), std::move(type));
|
tuple_elements.emplace_back(name, std::move(column));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sort to always create the same type for the same set of subcolumns.
|
/// Sort to always create the same type for the same set of subcolumns.
|
||||||
@ -446,35 +454,44 @@ std::pair<ColumnPtr, DataTypePtr> createTypeFromNode(const Node * node)
|
|||||||
|
|
||||||
auto tuple_names = extractVector<0>(tuple_elements);
|
auto tuple_names = extractVector<0>(tuple_elements);
|
||||||
auto tuple_columns = extractVector<1>(tuple_elements);
|
auto tuple_columns = extractVector<1>(tuple_elements);
|
||||||
auto tuple_types = extractVector<2>(tuple_elements);
|
|
||||||
|
|
||||||
return std::make_tuple(tuple_names, tuple_columns, tuple_types);
|
return std::make_tuple(std::move(tuple_names), std::move(tuple_columns));
|
||||||
};
|
};
|
||||||
|
|
||||||
if (node->kind == Node::SCALAR)
|
if (node->kind == Node::SCALAR)
|
||||||
{
|
{
|
||||||
return {node->data.column, node->data.type};
|
return node->data;
|
||||||
}
|
}
|
||||||
else if (node->kind == Node::NESTED)
|
else if (node->kind == Node::NESTED)
|
||||||
{
|
{
|
||||||
|
auto [tuple_names, tuple_columns] = collect_tuple_elemets(node->children);
|
||||||
|
|
||||||
Columns offsets_columns;
|
Columns offsets_columns;
|
||||||
ColumnPtr current_column = node->data.column;
|
offsets_columns.reserve(tuple_columns[0].array_dimensions + 1);
|
||||||
|
|
||||||
assert(node->data.array_dimensions > 0);
|
const auto & current_array = assert_cast<const ColumnArray &>(*node->data.column);
|
||||||
offsets_columns.reserve(node->data.array_dimensions);
|
offsets_columns.push_back(current_array.getOffsetsPtr());
|
||||||
|
|
||||||
for (size_t i = 0; i < node->data.array_dimensions; ++i)
|
for (size_t i = 0; i < tuple_columns[0].array_dimensions; ++i)
|
||||||
{
|
{
|
||||||
const auto & column_array = assert_cast<const ColumnArray &>(*current_column);
|
const auto & column_array = assert_cast<const ColumnArray &>(*tuple_columns[0].column);
|
||||||
|
|
||||||
offsets_columns.push_back(column_array.getOffsetsPtr());
|
offsets_columns.push_back(column_array.getOffsetsPtr());
|
||||||
current_column = column_array.getDataPtr();
|
tuple_columns[0].column = column_array.getDataPtr();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto [tuple_names, tuple_columns, tuple_types] = collect_tuple_elemets(node->children);
|
size_t num_elements = tuple_columns.size();
|
||||||
|
Columns tuple_elements_columns(num_elements);
|
||||||
|
DataTypes tuple_elements_types(num_elements);
|
||||||
|
|
||||||
auto result_column = ColumnArray::create(ColumnTuple::create(tuple_columns), offsets_columns.back());
|
for (size_t i = 0; i < num_elements; ++i)
|
||||||
auto result_type = createNested(tuple_types, tuple_names);
|
{
|
||||||
|
assert(tuple_columns[i].array_dimensions == tuple_columns[0].array_dimensions);
|
||||||
|
tuple_elements_columns[i] = reduceNumberOfDimensions(tuple_columns[i].column, tuple_columns[i].array_dimensions);
|
||||||
|
tuple_elements_types[i] = reduceNumberOfDimensions(tuple_columns[i].type, tuple_columns[i].array_dimensions);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto result_column = ColumnArray::create(ColumnTuple::create(tuple_elements_columns), offsets_columns.back());
|
||||||
|
auto result_type = createNested(tuple_elements_types, tuple_names);
|
||||||
|
|
||||||
for (auto it = offsets_columns.rbegin() + 1; it != offsets_columns.rend(); ++it)
|
for (auto it = offsets_columns.rbegin() + 1; it != offsets_columns.rend(); ++it)
|
||||||
{
|
{
|
||||||
@ -482,16 +499,27 @@ std::pair<ColumnPtr, DataTypePtr> createTypeFromNode(const Node * node)
|
|||||||
result_type = std::make_shared<DataTypeArray>(result_type);
|
result_type = std::make_shared<DataTypeArray>(result_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
return {result_column, result_type};
|
return {result_column, result_type, tuple_columns[0].array_dimensions};
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto [tuple_names, tuple_columns, tuple_types] = collect_tuple_elemets(node->children);
|
auto [tuple_names, tuple_columns] = collect_tuple_elemets(node->children);
|
||||||
|
|
||||||
auto result_column = ColumnTuple::create(tuple_columns);
|
size_t num_elements = tuple_columns.size();
|
||||||
auto result_type = std::make_shared<DataTypeTuple>(tuple_types, tuple_names);
|
Columns tuple_elements_columns(num_elements);
|
||||||
|
DataTypes tuple_elements_types(num_elements);
|
||||||
|
|
||||||
return {result_column, result_type};
|
for (size_t i = 0; i < tuple_columns.size(); ++i)
|
||||||
|
{
|
||||||
|
assert(tuple_columns[i].array_dimensions == tuple_columns[0].array_dimensions);
|
||||||
|
tuple_elements_columns[i] = tuple_columns[i].column;
|
||||||
|
tuple_elements_types[i] = tuple_columns[i].type;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto result_column = ColumnTuple::create(tuple_elements_columns);
|
||||||
|
auto result_type = std::make_shared<DataTypeTuple>(tuple_elements_types, tuple_names);
|
||||||
|
|
||||||
|
return {result_column, result_type, tuple_columns[0].array_dimensions};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -499,11 +527,13 @@ std::pair<ColumnPtr, DataTypePtr> createTypeFromNode(const Node * node)
|
|||||||
|
|
||||||
std::pair<PathsInData, DataTypes> flattenTuple(const DataTypePtr & type)
|
std::pair<PathsInData, DataTypes> flattenTuple(const DataTypePtr & type)
|
||||||
{
|
{
|
||||||
PathsInData new_paths;
|
std::vector<PathInData::Parts> new_path_parts;
|
||||||
DataTypes new_types;
|
DataTypes new_types;
|
||||||
PathInDataBuilder builder;
|
PathInDataBuilder builder;
|
||||||
|
|
||||||
flattenTupleImpl(builder, type, 0, new_paths, new_types);
|
flattenTupleImpl(builder, type, new_path_parts, new_types);
|
||||||
|
|
||||||
|
PathsInData new_paths(new_path_parts.begin(), new_path_parts.end());
|
||||||
return {new_paths, new_types};
|
return {new_paths, new_types};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -546,15 +576,7 @@ std::pair<ColumnPtr, DataTypePtr> unflattenTuple(
|
|||||||
auto type = tuple_types[i];
|
auto type = tuple_types[i];
|
||||||
|
|
||||||
const auto & parts = paths[i].getParts();
|
const auto & parts = paths[i].getParts();
|
||||||
|
|
||||||
size_t num_parts = parts.size();
|
size_t num_parts = parts.size();
|
||||||
size_t nested_level = std::count_if(parts.begin(), parts.end(), [](const auto & part) { return part.is_nested; });
|
|
||||||
size_t array_level = getNumberOfDimensions(*type);
|
|
||||||
|
|
||||||
if (array_level < nested_level)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
|
||||||
"Number of dimensions ({}) is less than number Nested types ({}) for path {}",
|
|
||||||
array_level, nested_level, paths[i].getPath());
|
|
||||||
|
|
||||||
size_t pos = 0;
|
size_t pos = 0;
|
||||||
tree.add(paths[i], [&](Node::Kind kind, bool exists) -> std::shared_ptr<Node>
|
tree.add(paths[i], [&](Node::Kind kind, bool exists) -> std::shared_ptr<Node>
|
||||||
@ -564,27 +586,13 @@ std::pair<ColumnPtr, DataTypePtr> unflattenTuple(
|
|||||||
"Not enough name parts for path {}. Expected at least {}, got {}",
|
"Not enough name parts for path {}. Expected at least {}, got {}",
|
||||||
paths[i].getPath(), pos + 1, num_parts);
|
paths[i].getPath(), pos + 1, num_parts);
|
||||||
|
|
||||||
ColumnWithTypeAndDimensions current_column;
|
size_t array_dimensions = kind == Node::NESTED ? 1 : parts[pos].anonymous_array_level;
|
||||||
if (kind == Node::NESTED)
|
ColumnWithTypeAndDimensions current_column{column, type, array_dimensions};
|
||||||
|
|
||||||
|
if (array_dimensions)
|
||||||
{
|
{
|
||||||
assert(parts[pos].is_nested);
|
type = reduceNumberOfDimensions(type, array_dimensions);
|
||||||
|
column = reduceNumberOfDimensions(column, array_dimensions);
|
||||||
size_t dimensions_to_reduce = array_level - nested_level + 1;
|
|
||||||
--nested_level;
|
|
||||||
|
|
||||||
current_column = ColumnWithTypeAndDimensions{column, type, dimensions_to_reduce};
|
|
||||||
|
|
||||||
if (dimensions_to_reduce)
|
|
||||||
{
|
|
||||||
type = reduceNumberOfDimensions(type, dimensions_to_reduce);
|
|
||||||
column = reduceNumberOfDimensions(column, dimensions_to_reduce);
|
|
||||||
}
|
|
||||||
|
|
||||||
array_level -= dimensions_to_reduce;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
current_column = ColumnWithTypeAndDimensions{column, type, 0};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
++pos;
|
++pos;
|
||||||
@ -597,7 +605,8 @@ std::pair<ColumnPtr, DataTypePtr> unflattenTuple(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return createTypeFromNode(tree.getRoot());
|
const auto & [column, type, _] = createTypeFromNode(tree.getRoot());
|
||||||
|
return std::make_pair(std::move(column), std::move(type));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void addConstantToWithClause(const ASTPtr & query, const String & column_name, const DataTypePtr & data_type)
|
static void addConstantToWithClause(const ASTPtr & query, const String & column_name, const DataTypePtr & data_type)
|
||||||
|
@ -153,13 +153,11 @@ private:
|
|||||||
paths.reserve(paths.size() + arrays_by_path.size());
|
paths.reserve(paths.size() + arrays_by_path.size());
|
||||||
values.reserve(values.size() + arrays_by_path.size());
|
values.reserve(values.size() + arrays_by_path.size());
|
||||||
|
|
||||||
bool is_nested = arrays_by_path.size() > 1 || !arrays_by_path.begin()->getMapped().first.empty();
|
|
||||||
|
|
||||||
for (auto && [_, value] : arrays_by_path)
|
for (auto && [_, value] : arrays_by_path)
|
||||||
{
|
{
|
||||||
auto && [path, path_array] = value;
|
auto && [path, path_array] = value;
|
||||||
|
|
||||||
paths.push_back(builder.append(path, is_nested).getParts());
|
paths.push_back(builder.append(path, true).getParts());
|
||||||
values.push_back(std::move(path_array));
|
values.push_back(std::move(path_array));
|
||||||
|
|
||||||
builder.popBack(path.size());
|
builder.popBack(path.size());
|
||||||
|
@ -26,13 +26,13 @@ PathInData::PathInData(std::string_view path_)
|
|||||||
if (*it == '.')
|
if (*it == '.')
|
||||||
{
|
{
|
||||||
size_t size = static_cast<size_t>(it - begin);
|
size_t size = static_cast<size_t>(it - begin);
|
||||||
parts.emplace_back(std::string_view{begin, size}, false);
|
parts.emplace_back(std::string_view{begin, size}, false, 0);
|
||||||
begin = it + 1;
|
begin = it + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t size = static_cast<size_t>(end - begin);
|
size_t size = static_cast<size_t>(end - begin);
|
||||||
parts.emplace_back(std::string_view{begin, size}, false);
|
parts.emplace_back(std::string_view{begin, size}, false, 0.);
|
||||||
}
|
}
|
||||||
|
|
||||||
PathInData::PathInData(const Parts & parts_)
|
PathInData::PathInData(const Parts & parts_)
|
||||||
@ -65,6 +65,7 @@ UInt128 PathInData::getPartsHash(const Parts & parts_)
|
|||||||
{
|
{
|
||||||
hash.update(part.key.data(), part.key.length());
|
hash.update(part.key.data(), part.key.length());
|
||||||
hash.update(part.is_nested);
|
hash.update(part.is_nested);
|
||||||
|
hash.update(part.anonymous_array_level);
|
||||||
}
|
}
|
||||||
|
|
||||||
UInt128 res;
|
UInt128 res;
|
||||||
@ -78,7 +79,8 @@ void PathInData::writeBinary(WriteBuffer & out) const
|
|||||||
for (const auto & part : parts)
|
for (const auto & part : parts)
|
||||||
{
|
{
|
||||||
writeStringBinary(part.key, out);
|
writeStringBinary(part.key, out);
|
||||||
writeVarUInt(static_cast<UInt8>(part.is_nested) , out);
|
writeVarUInt(part.is_nested, out);
|
||||||
|
writeVarUInt(part.anonymous_array_level, out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -93,11 +95,14 @@ void PathInData::readBinary(ReadBuffer & in)
|
|||||||
|
|
||||||
for (size_t i = 0; i < num_parts; ++i)
|
for (size_t i = 0; i < num_parts; ++i)
|
||||||
{
|
{
|
||||||
UInt8 is_nested;
|
bool is_nested;
|
||||||
|
UInt8 anonymous_array_level;
|
||||||
|
|
||||||
auto ref = readStringBinaryInto(arena, in);
|
auto ref = readStringBinaryInto(arena, in);
|
||||||
readVarUInt(is_nested, in);
|
readVarUInt(is_nested, in);
|
||||||
|
readVarUInt(anonymous_array_level, in);
|
||||||
|
|
||||||
temp_parts.emplace_back(static_cast<std::string_view>(ref), is_nested);
|
temp_parts.emplace_back(static_cast<std::string_view>(ref), is_nested, anonymous_array_level);
|
||||||
}
|
}
|
||||||
|
|
||||||
path = buildPath(temp_parts);
|
path = buildPath(temp_parts);
|
||||||
@ -122,16 +127,16 @@ String PathInData::buildPath(const Parts & other_parts)
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
PathInData::Parts PathInData::buildParts(const String & path, const Parts & other_parts)
|
PathInData::Parts PathInData::buildParts(const String & other_path, const Parts & other_parts)
|
||||||
{
|
{
|
||||||
if (other_parts.empty())
|
if (other_parts.empty())
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
Parts res;
|
Parts res;
|
||||||
const char * begin = path.data();
|
const char * begin = other_path.data();
|
||||||
for (const auto & part : other_parts)
|
for (const auto & part : other_parts)
|
||||||
{
|
{
|
||||||
res.emplace_back(std::string_view{begin, part.key.length()}, part.is_nested);
|
res.emplace_back(std::string_view{begin, part.key.length()}, part.is_nested, part.anonymous_array_level);
|
||||||
begin += part.key.length() + 1;
|
begin += part.key.length() + 1;
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
@ -139,24 +144,43 @@ PathInData::Parts PathInData::buildParts(const String & path, const Parts & othe
|
|||||||
|
|
||||||
size_t PathInData::Hash::operator()(const PathInData & value) const
|
size_t PathInData::Hash::operator()(const PathInData & value) const
|
||||||
{
|
{
|
||||||
return std::hash<String>{}(value.path);
|
auto hash = getPartsHash(value.parts);
|
||||||
|
return hash.items[0] ^ hash.items[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
PathInDataBuilder & PathInDataBuilder::append(std::string_view key, bool is_nested)
|
PathInDataBuilder & PathInDataBuilder::append(std::string_view key, bool is_array)
|
||||||
{
|
{
|
||||||
if (!parts.empty())
|
if (parts.empty())
|
||||||
parts.back().is_nested = is_nested;
|
current_anonymous_array_level += is_array;
|
||||||
|
|
||||||
|
if (!key.empty())
|
||||||
|
{
|
||||||
|
if (!parts.empty())
|
||||||
|
parts.back().is_nested = is_array;
|
||||||
|
|
||||||
|
parts.emplace_back(key, false, current_anonymous_array_level);
|
||||||
|
current_anonymous_array_level = 0;
|
||||||
|
}
|
||||||
|
|
||||||
parts.emplace_back(key, false);
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
PathInDataBuilder & PathInDataBuilder::append(const PathInData::Parts & path, bool is_nested)
|
PathInDataBuilder & PathInDataBuilder::append(const PathInData::Parts & path, bool is_array)
|
||||||
{
|
{
|
||||||
if (!parts.empty())
|
if (parts.empty())
|
||||||
parts.back().is_nested = is_nested;
|
current_anonymous_array_level += is_array;
|
||||||
|
|
||||||
|
if (!path.empty())
|
||||||
|
{
|
||||||
|
if (!parts.empty())
|
||||||
|
parts.back().is_nested = is_array;
|
||||||
|
|
||||||
|
auto it = parts.insert(parts.end(), path.begin(), path.end());
|
||||||
|
for (; it != parts.end(); ++it)
|
||||||
|
it->anonymous_array_level += current_anonymous_array_level;
|
||||||
|
current_anonymous_array_level = 0;
|
||||||
|
}
|
||||||
|
|
||||||
parts.insert(parts.end(), path.begin(), path.end());
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,13 +16,16 @@ public:
|
|||||||
struct Part
|
struct Part
|
||||||
{
|
{
|
||||||
Part() = default;
|
Part() = default;
|
||||||
Part(std::string_view key_, bool is_nested_)
|
Part(std::string_view key_, bool is_nested_, UInt8 anonymous_array_level_)
|
||||||
: key(key_), is_nested(is_nested_)
|
: key(key_), is_nested(is_nested_), anonymous_array_level(anonymous_array_level_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string_view key;
|
std::string_view key;
|
||||||
bool is_nested = false;
|
bool is_nested = false;
|
||||||
|
UInt8 anonymous_array_level = 0;
|
||||||
|
|
||||||
|
bool operator==(const Part & other) const = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
using Parts = std::vector<Part>;
|
using Parts = std::vector<Part>;
|
||||||
@ -47,13 +50,12 @@ public:
|
|||||||
void writeBinary(WriteBuffer & out) const;
|
void writeBinary(WriteBuffer & out) const;
|
||||||
void readBinary(ReadBuffer & in);
|
void readBinary(ReadBuffer & in);
|
||||||
|
|
||||||
bool operator==(const PathInData & other) const { return path == other.path; }
|
bool operator==(const PathInData & other) const { return parts == other.parts; }
|
||||||
bool operator!=(const PathInData & other) const { return !(*this == other); }
|
|
||||||
struct Hash { size_t operator()(const PathInData & value) const; };
|
struct Hash { size_t operator()(const PathInData & value) const; };
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static String buildPath(const Parts & other_parts);
|
static String buildPath(const Parts & other_parts);
|
||||||
static Parts buildParts(const String & path, const Parts & other_parts);
|
static Parts buildParts(const String & other_path, const Parts & other_parts);
|
||||||
|
|
||||||
String path;
|
String path;
|
||||||
Parts parts;
|
Parts parts;
|
||||||
@ -64,14 +66,15 @@ class PathInDataBuilder
|
|||||||
public:
|
public:
|
||||||
const PathInData::Parts & getParts() const { return parts; }
|
const PathInData::Parts & getParts() const { return parts; }
|
||||||
|
|
||||||
PathInDataBuilder & append(std::string_view key, bool is_nested);
|
PathInDataBuilder & append(std::string_view key, bool is_array);
|
||||||
PathInDataBuilder & append(const PathInData::Parts & path, bool is_nested);
|
PathInDataBuilder & append(const PathInData::Parts & path, bool is_array);
|
||||||
|
|
||||||
void popBack();
|
void popBack();
|
||||||
void popBack(size_t n);
|
void popBack(size_t n);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
PathInData::Parts parts;
|
PathInData::Parts parts;
|
||||||
|
size_t current_anonymous_array_level = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
using PathsInData = std::vector<PathInData>;
|
using PathsInData = std::vector<PathInData>;
|
||||||
|
@ -260,7 +260,7 @@ void SerializationObject<Parser>::serializeBinaryBulkWithMultipleStreams(
|
|||||||
if (auto * stream = settings.getter(settings.path))
|
if (auto * stream = settings.getter(settings.path))
|
||||||
writeVarUInt(column_object.getSubcolumns().size(), *stream);
|
writeVarUInt(column_object.getSubcolumns().size(), *stream);
|
||||||
|
|
||||||
const auto & subcolumns = column_object.getSubcolumns().getLeaves();
|
const auto & subcolumns = column_object.getSubcolumns();
|
||||||
for (const auto & entry : subcolumns)
|
for (const auto & entry : subcolumns)
|
||||||
{
|
{
|
||||||
settings.path.back() = Substream::ObjectStructure;
|
settings.path.back() = Substream::ObjectStructure;
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
#include <DataTypes/Serializations/JSONDataParser.h>
|
#include <DataTypes/Serializations/JSONDataParser.h>
|
||||||
#include <Common/JSONParsers/SimdJSONParser.h>
|
#include <Common/JSONParsers/SimdJSONParser.h>
|
||||||
#include <IO/ReadBufferFromString.h>
|
#include <IO/ReadBufferFromString.h>
|
||||||
#include <gtest/gtest.h>
|
|
||||||
#include <Common/FieldVisitorToString.h>
|
#include <Common/FieldVisitorToString.h>
|
||||||
|
|
||||||
|
#include <ostream>
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
#if USE_SIMDJSON
|
#if USE_SIMDJSON
|
||||||
|
|
||||||
using namespace DB;
|
using namespace DB;
|
||||||
@ -49,26 +51,32 @@ TEST(JSONDataParser, ReadJSON)
|
|||||||
|
|
||||||
struct JSONPathAndValue
|
struct JSONPathAndValue
|
||||||
{
|
{
|
||||||
String path;
|
PathInData path;
|
||||||
Field value;
|
Field value;
|
||||||
std::vector<bool> is_nested;
|
|
||||||
|
|
||||||
JSONPathAndValue(const String & path_, const Field & value_, const std::vector<bool> & is_nested_)
|
|
||||||
: path(path_), value(value_), is_nested(is_nested_)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
JSONPathAndValue(const PathInData & path_, const Field & value_)
|
JSONPathAndValue(const PathInData & path_, const Field & value_)
|
||||||
: path(path_.getPath()), value(value_)
|
: path(path_), value(value_)
|
||||||
{
|
{
|
||||||
for (const auto & part : path_.getParts())
|
|
||||||
is_nested.push_back(part.is_nested);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator==(const JSONPathAndValue & other) const = default;
|
bool operator==(const JSONPathAndValue & other) const = default;
|
||||||
bool operator<(const JSONPathAndValue & other) const { return path < other.path; }
|
bool operator<(const JSONPathAndValue & other) const { return path.getPath() < other.path.getPath(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static std::ostream & operator<<(std::ostream & ostr, const JSONPathAndValue & path_and_value)
|
||||||
|
{
|
||||||
|
ostr << "{ PathInData{";
|
||||||
|
bool first = true;
|
||||||
|
for (const auto & part : path_and_value.path.getParts())
|
||||||
|
{
|
||||||
|
ostr << (first ? "{" : ", {") << part.key << ", " << part.is_nested << ", " << part.anonymous_array_level << "}";
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ostr << "}, Field{" << applyVisitor(FieldVisitorToString(), path_and_value.value) << "} }";
|
||||||
|
return ostr;
|
||||||
|
}
|
||||||
|
|
||||||
using JSONValues = std::vector<JSONPathAndValue>;
|
using JSONValues = std::vector<JSONPathAndValue>;
|
||||||
|
|
||||||
static void check(
|
static void check(
|
||||||
@ -100,17 +108,17 @@ TEST(JSONDataParser, Parse)
|
|||||||
{
|
{
|
||||||
check(json1, "json1",
|
check(json1, "json1",
|
||||||
{
|
{
|
||||||
{"k1", 1, {false}},
|
{ PathInData{{{"k1", false, 0}}}, 1 },
|
||||||
{"k2.k3", "aa", {false, false}},
|
{ PathInData{{{"k2", false, 0}, {"k3", false, 0}}}, "aa" },
|
||||||
{"k2.k4", 2, {false, false}},
|
{ PathInData{{{"k2", false, 0}, {"k4", false, 0}}}, 2 },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
check(json2, "json2",
|
check(json2, "json2",
|
||||||
{
|
{
|
||||||
{"k1.k2", Array{"aaa", "ddd"}, {true, false}},
|
{ PathInData{{{"k1", true, 0}, {"k2", false, 0}}}, Array{"aaa", "ddd"} },
|
||||||
{"k1.k3.k4", Array{Array{"bbb", "ccc"}, Array{"eee", "fff"}}, {true, true, false}},
|
{ PathInData{{{"k1", true, 0}, {"k3", true, 0}, {"k4", false, 0}}}, Array{Array{"bbb", "ccc"}, Array{"eee", "fff"}} },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -134,15 +142,11 @@ TEST(JSONDataParser, Parse)
|
|||||||
}
|
}
|
||||||
]})";
|
]})";
|
||||||
|
|
||||||
Strings paths = {"k1.k2.k4", "k1.k5", "k1.k2.k3"};
|
|
||||||
|
|
||||||
auto k1k2k4 = Array{Array{3, 4}, Array{7, 8}};
|
|
||||||
|
|
||||||
check(json3, "json3",
|
check(json3, "json3",
|
||||||
{
|
{
|
||||||
{"k1.k5", Array{"foo", "bar"}, {true, false}},
|
{ PathInData{{{"k1", true, 0}, {"k5", false, 0}}}, Array{"foo", "bar"} },
|
||||||
{"k1.k2.k3", Array{Array{1, 2}, Array{5, 6}}, {true, false, false}},
|
{ PathInData{{{"k1", true, 0}, {"k2", false, 0}, {"k3", false, 0}}}, Array{Array{1, 2}, Array{5, 6}} },
|
||||||
{"k1.k2.k4", Array{Array{3, 4}, Array{7, 8}}, {true, false, false}},
|
{ PathInData{{{"k1", true, 0}, {"k2", false, 0}, {"k4", false, 0}}}, Array{Array{3, 4}, Array{7, 8}} },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -162,15 +166,18 @@ TEST(JSONDataParser, Parse)
|
|||||||
|
|
||||||
check(json4, "json4",
|
check(json4, "json4",
|
||||||
{
|
{
|
||||||
{"k1.k5", Array{"foo", "bar"}, {true, false}},
|
{ PathInData{{{"k1", true, 0}, {"k5", false, 0}}}, Array{"foo", "bar"} },
|
||||||
{"k1.k2.k3", Array{Array{1, 2}, Array{5, 6}}, {true, true, false}},
|
{ PathInData{{{"k1", true, 0}, {"k2", true, 0}, {"k3", false, 0}}}, Array{Array{1, 2}, Array{5, 6}} },
|
||||||
{"k1.k2.k4", Array{Array{3, 4}, Array{7, 8}}, {true, true, false}},
|
{ PathInData{{{"k1", true, 0}, {"k2", true, 0}, {"k4", false, 0}}}, Array{Array{3, 4}, Array{7, 8}} },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
const String json5 = R"({"k1": [[1, 2, 3], [4, 5], [6]]})";
|
const String json5 = R"({"k1": [[1, 2, 3], [4, 5], [6]]})";
|
||||||
check(json5, "json5", {{"k1", Array{Array{1, 2, 3}, Array{4, 5}, Array{6}}, {false}}});
|
check(json5, "json5",
|
||||||
|
{
|
||||||
|
{ PathInData{{{"k1", false, 0}}}, Array{Array{1, 2, 3}, Array{4, 5}, Array{6}} }
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -182,15 +189,10 @@ TEST(JSONDataParser, Parse)
|
|||||||
]
|
]
|
||||||
})";
|
})";
|
||||||
|
|
||||||
Strings paths = {"k1.k2", "k1.k3"};
|
|
||||||
|
|
||||||
auto k1k2 = Array{Array{1, 3}, Array{5}};
|
|
||||||
auto k1k3 = Array{Array{2, 4}, Array{6}};
|
|
||||||
|
|
||||||
check(json6, "json6",
|
check(json6, "json6",
|
||||||
{
|
{
|
||||||
{"k1.k2", Array{Array{1, 3}, Array{5}}, {true, false}},
|
{ PathInData{{{"k1", true, 0}, {"k2", false, 1}}}, Array{Array{1, 3}, Array{5}} },
|
||||||
{"k1.k3", Array{Array{2, 4}, Array{6}}, {true, false}},
|
{ PathInData{{{"k1", true, 0}, {"k3", false, 1}}}, Array{Array{2, 4}, Array{6}} },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -205,8 +207,8 @@ TEST(JSONDataParser, Parse)
|
|||||||
|
|
||||||
check(json7, "json7",
|
check(json7, "json7",
|
||||||
{
|
{
|
||||||
{"k1.k2", Array{Array{1, 3}, Array{5}}, {true, false}},
|
{ PathInData{{{"k1", true, 0}, {"k2", false, 0}}}, Array{Array{1, 3}, Array{5}} },
|
||||||
{"k1.k3", Array{Array{2, 4}, Array{6}}, {true, false}},
|
{ PathInData{{{"k1", true, 0}, {"k3", false, 0}}}, Array{Array{2, 4}, Array{6}} },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -26,7 +26,6 @@ FROM system.parts_columns
|
|||||||
WHERE table = 't_json' AND database = currentDatabase() AND active AND column = 'data'
|
WHERE table = 't_json' AND database = currentDatabase() AND active AND column = 'data'
|
||||||
ORDER BY name;
|
ORDER BY name;
|
||||||
|
|
||||||
|
|
||||||
SELECT '============';
|
SELECT '============';
|
||||||
TRUNCATE TABLE t_json;
|
TRUNCATE TABLE t_json;
|
||||||
|
|
||||||
|
2
tests/queries/0_stateless/01825_type_json_8.reference
Normal file
2
tests/queries/0_stateless/01825_type_json_8.reference
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
([[(1,2),(3,4)],[(5,6)]]) Tuple(k1 Array(Nested(k2 Int8, k3 Int8)))
|
||||||
|
([([1,3,4,5],[6,7]),([8],[9,10,11])]) Tuple(k1 Nested(k2 Array(Int8), k3 Array(Int8)))
|
36
tests/queries/0_stateless/01825_type_json_8.sh
Normal file
36
tests/queries/0_stateless/01825_type_json_8.sh
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Tags: no-fasttest
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_json_8"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT -q "CREATE TABLE t_json_8 (data JSON) ENGINE = MergeTree ORDER BY tuple()"
|
||||||
|
|
||||||
|
cat <<EOF | $CLICKHOUSE_CLIENT -q "INSERT INTO t_json_8 FORMAT JSONAsObject"
|
||||||
|
{
|
||||||
|
"k1": [
|
||||||
|
[{"k2": 1, "k3": 2}, {"k2": 3, "k3": 4}],
|
||||||
|
[{"k2": 5, "k3": 6}]
|
||||||
|
]
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT -q "SELECT data, toTypeName(data) FROM t_json_8"
|
||||||
|
$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE t_json_8"
|
||||||
|
|
||||||
|
cat <<EOF | $CLICKHOUSE_CLIENT -q "INSERT INTO t_json_8 FORMAT JSONAsObject"
|
||||||
|
{
|
||||||
|
"k1": [
|
||||||
|
{"k2": [1, 3, 4, 5], "k3": [6, 7]},
|
||||||
|
{"k2": [8], "k3": [9, 10, 11]}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT -q "SELECT data, toTypeName(data) FROM t_json_8"
|
||||||
|
$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE t_json_8"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT -q "DROP TABLE t_json_8"
|
@ -7,6 +7,6 @@ Philadelphia 76ers 57
|
|||||||
Atlanta Hawks 55
|
Atlanta Hawks 55
|
||||||
Larry Bird 10
|
Larry Bird 10
|
||||||
Clyde Drexler 4
|
Clyde Drexler 4
|
||||||
Magic Johnson 3
|
|
||||||
Alvin Robertson 3
|
Alvin Robertson 3
|
||||||
Fat Lever 2
|
Magic Johnson 3
|
||||||
|
Charles Barkley 2
|
||||||
|
@ -34,7 +34,7 @@ ${CLICKHOUSE_CLIENT} -q \
|
|||||||
SELECT arrayJoin(arrayJoin(data.teams.players)) as players from nbagames \
|
SELECT arrayJoin(arrayJoin(data.teams.players)) as players from nbagames \
|
||||||
) \
|
) \
|
||||||
) \
|
) \
|
||||||
GROUP BY player ORDER BY triple_doubles DESC LIMIT 5"
|
GROUP BY player ORDER BY triple_doubles DESC, player LIMIT 5"
|
||||||
|
|
||||||
|
|
||||||
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS nbagames"
|
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS nbagames"
|
||||||
|
Loading…
Reference in New Issue
Block a user