dynamic columns: fix several cases of parsing json

This commit is contained in:
Anton Popov 2021-09-11 03:20:54 +03:00
parent ee7c0d4cc1
commit 3bd96d709d
5 changed files with 103 additions and 14 deletions

View File

@ -13,7 +13,6 @@
#include <Interpreters/convertFieldToType.h>
#include <Common/HashTable/HashSet.h>
namespace DB
{
@ -28,25 +27,45 @@ namespace ErrorCodes
namespace
{
Array createEmptyArrayField(size_t num_dimensions)
{
Array array;
Array * current_array = &array;
for (size_t i = 1; i < num_dimensions; ++i)
{
current_array->push_back(Array());
current_array = &current_array->back().get<Array &>();
}
return array;
}
class FieldVisitorReplaceNull : public StaticVisitor<Field>
{
public:
[[maybe_unused]] explicit FieldVisitorReplaceNull(const Field & replacement_)
[[maybe_unused]] explicit FieldVisitorReplaceNull(
const Field & replacement_, size_t num_dimensions_)
: replacement(replacement_)
, num_dimensions(num_dimensions_)
{
}
Field operator()(const Null &) const { return replacement; }
template <typename T>
Field operator()(const T & x) const
{
if constexpr (std::is_base_of_v<FieldVector, T>)
if constexpr (std::is_same_v<T, Null>)
{
return num_dimensions
? createEmptyArrayField(num_dimensions)
: replacement;
}
else if constexpr (std::is_same_v<T, Array>)
{
assert(num_dimensions > 0);
const size_t size = x.size();
T res(size);
Array res(size);
for (size_t i = 0; i < size; ++i)
res[i] = applyVisitor(*this, x[i]);
res[i] = applyVisitor(FieldVisitorReplaceNull(replacement, num_dimensions - 1), x[i]);
return res;
}
else
@ -54,7 +73,8 @@ public:
}
private:
Field replacement;
const Field & replacement;
size_t num_dimensions;
};
class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t>
@ -66,16 +86,24 @@ public:
return 1;
const size_t size = x.size();
size_t dimensions = applyVisitor(*this, x[0]);
for (size_t i = 1; i < size; ++i)
std::optional<size_t> dimensions;
for (size_t i = 0; i < size; ++i)
{
/// Do not count Nulls, because they will be replaced by default
/// values with proper number of dimensions.
if (x[i].isNull())
continue;
size_t current_dimensions = applyVisitor(*this, x[i]);
if (current_dimensions != dimensions)
if (!dimensions)
dimensions = current_dimensions;
else if (current_dimensions != *dimensions)
throw Exception(ErrorCodes::NUMBER_OF_DIMENSIONS_MISMATHED,
"Number of dimensions mismatched among array elements");
}
return 1 + dimensions;
return 1 + dimensions.value_or(0);
}
template <typename T>
@ -239,6 +267,7 @@ void ColumnObject::Subcolumn::insert(Field field)
if (is_nullable && !base_type->isNullable())
base_type = makeNullable(base_type);
auto value_type = createArrayOfType(base_type, value_dim);
if (!is_nullable && base_type->isNullable())
{
base_type = removeNullable(base_type);
@ -248,10 +277,11 @@ void ColumnObject::Subcolumn::insert(Field field)
return;
}
field = applyVisitor(FieldVisitorReplaceNull(base_type->getDefault()), std::move(field));
value_type = createArrayOfType(base_type, value_dim);
auto default_value = value_type->getDefault();
field = applyVisitor(FieldVisitorReplaceNull(default_value, value_dim), std::move(field));
}
auto value_type = createArrayOfType(base_type, value_dim);
bool type_changed = false;
if (data.empty())

View File

@ -0,0 +1,3 @@
Tuple(key String, `out.outputs.index` Array(Array(Int32)), `out.outputs.n` Array(Array(Int8)), `out.type` Array(Int8), `out.value` Array(Int8))
v1 [0,0] [1,2] [[],[1960131]] [[],[1960131]]
v2 [1,1] [4,3] [[1881212],[]] [[1881212],[]]

View File

@ -0,0 +1,56 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_json_6;"
$CLICKHOUSE_CLIENT -q"CREATE TABLE t_json_6 (data JSON) ENGINE = MergeTree ORDER BY tuple();"
cat <<EOF | $CLICKHOUSE_CLIENT -q "INSERT INTO t_json_6 FORMAT JSONAsObject"
{
"key": "v1",
"out": [
{
"type": 0,
"value": 1,
"outputs": []
},
{
"type": 0,
"value": 2,
"outputs": [
{
"index": 1960131,
"n": 0
}
]
}
]
}
{
"key": "v2",
"out": [
{
"type": 1,
"value": 4,
"outputs": [
{
"index": 1881212,
"n": 1
}
]
},
{
"type": 1,
"value": 3
}
]
}
EOF
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT toTypeName(data) FROM t_json_6;"
$CLICKHOUSE_CLIENT -q "SELECT data.key, data.out.type, data.out.value, data.out.outputs.index, data.out.outputs.index FROM t_json_6 ORDER BY data.key"
$CLICKHOUSE_CLIENT -q "DROP TABLE t_json_6;"