mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Replace missed Object(Nullable(json)) subcolumns
This commit is contained in:
parent
cf8c614fed
commit
78660eb859
@ -30,6 +30,7 @@
|
||||
#include <Interpreters/getTableExpressions.h>
|
||||
#include <Interpreters/replaceAliasColumnsInQuery.h>
|
||||
#include <Interpreters/replaceForPositionalArguments.h>
|
||||
#include <Interpreters/replaceMissedSubcolumnsInQuery.h>
|
||||
|
||||
#include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
|
||||
#include <Functions/UserDefined/UserDefinedSQLFunctionVisitor.h>
|
||||
@ -48,6 +49,7 @@
|
||||
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeObject.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -948,6 +950,10 @@ void TreeRewriterResult::collectSourceColumns(bool add_special)
|
||||
source_columns.swap(columns_from_storage);
|
||||
else
|
||||
source_columns.insert(source_columns.end(), columns_from_storage.begin(), columns_from_storage.end());
|
||||
|
||||
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
|
||||
auto metadata_column_descriptions = metadata_snapshot->getColumns();
|
||||
source_columns_ordinary = metadata_column_descriptions.getOrdinary();
|
||||
}
|
||||
|
||||
source_columns_set = removeDuplicateColumns(source_columns);
|
||||
@ -1117,6 +1123,33 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
|
||||
}
|
||||
}
|
||||
|
||||
/// Collect missed object subcolumns
|
||||
if (!unknown_required_source_columns.empty())
|
||||
{
|
||||
for (const NameAndTypePair & pair : source_columns_ordinary)
|
||||
{
|
||||
for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();)
|
||||
{
|
||||
size_t object_pos = it->find('.');
|
||||
if (object_pos != std::string::npos)
|
||||
{
|
||||
String object_name = it->substr(0, object_pos);
|
||||
if (pair.type->getTypeId() == TypeIndex::Object)
|
||||
{
|
||||
const auto * object_type = typeid_cast<const DataTypeObject *>(pair.type.get());
|
||||
if (object_type->getSchemaFormat() == "json" && object_type->hasNullableSubcolumns())
|
||||
{
|
||||
missed_subcolumns.insert(*it);
|
||||
it = unknown_required_source_columns.erase(it);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!unknown_required_source_columns.empty())
|
||||
{
|
||||
constexpr auto format_string = "Missing columns: {} while processing query: '{}', required columns:{}{}";
|
||||
@ -1301,6 +1334,13 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
||||
|
||||
result.collectUsedColumns(query, true, settings.query_plan_optimize_primary_key);
|
||||
|
||||
if (!result.missed_subcolumns.empty())
|
||||
{
|
||||
for (const String & column_name : result.missed_subcolumns)
|
||||
replaceMissedSubcolumnsInQuery(query, column_name);
|
||||
result.missed_subcolumns.clear();
|
||||
}
|
||||
|
||||
result.required_source_columns_before_expanding_alias_columns = result.required_source_columns.getNames();
|
||||
|
||||
/// rewrite filters for select query, must go after getArrayJoinedColumns
|
||||
@ -1399,6 +1439,14 @@ TreeRewriterResultPtr TreeRewriter::analyze(
|
||||
bool is_ok = result.collectUsedColumns(query, false, settings.query_plan_optimize_primary_key, no_throw);
|
||||
if (!is_ok)
|
||||
return {};
|
||||
|
||||
if (!result.missed_subcolumns.empty())
|
||||
{
|
||||
for (const String & column_name : result.missed_subcolumns)
|
||||
replaceMissedSubcolumnsInQuery(query, column_name);
|
||||
result.missed_subcolumns.clear();
|
||||
}
|
||||
|
||||
return std::make_shared<const TreeRewriterResult>(result);
|
||||
}
|
||||
|
||||
|
@ -36,6 +36,11 @@ struct TreeRewriterResult
|
||||
/// Same as above but also record alias columns which are expanded. This is for RBAC access check.
|
||||
Names required_source_columns_before_expanding_alias_columns;
|
||||
|
||||
/// Set of columns that object columns are not extended. This is for distinguishing JSON and Tuple type.
|
||||
NamesAndTypesList source_columns_ordinary;
|
||||
|
||||
NameSet missed_subcolumns;
|
||||
|
||||
/// Set of alias columns that are expanded to their alias expressions. We still need the original columns to check access permission.
|
||||
NameSet expanded_aliases;
|
||||
|
||||
|
70
src/Interpreters/replaceMissedSubcolumnsInQuery.cpp
Normal file
70
src/Interpreters/replaceMissedSubcolumnsInQuery.cpp
Normal file
@ -0,0 +1,70 @@
|
||||
#include <Interpreters/replaceMissedSubcolumnsInQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool replaceMissedSubcolumnsInFunction(ASTPtr & ast, const String & column_name)
|
||||
{
|
||||
bool is_replaced = false;
|
||||
|
||||
if (auto * identifier = ast->as<ASTIdentifier>())
|
||||
{
|
||||
if (column_name == identifier->getColumnName())
|
||||
{
|
||||
ast = std::make_shared<ASTLiteral>(Field());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else if (auto * node = ast->as<ASTFunction>())
|
||||
{
|
||||
if (node->arguments)
|
||||
{
|
||||
size_t num_arguments = node->arguments->children.size();
|
||||
for (size_t arg = 0; arg < num_arguments; ++arg)
|
||||
{
|
||||
auto & child = node->arguments->children[arg];
|
||||
if (replaceMissedSubcolumnsInFunction(child, column_name))
|
||||
is_replaced = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto & child : ast->children)
|
||||
{
|
||||
if (replaceMissedSubcolumnsInFunction(child, column_name))
|
||||
is_replaced = true;
|
||||
}
|
||||
}
|
||||
|
||||
return is_replaced;
|
||||
}
|
||||
|
||||
void replaceMissedSubcolumnsInQuery(ASTPtr & ast, const String & column_name)
|
||||
{
|
||||
if (auto * identifier = ast->as<ASTIdentifier>())
|
||||
{
|
||||
if (column_name == identifier->getColumnName())
|
||||
{
|
||||
auto literal = std::make_shared<ASTLiteral>(Field());
|
||||
literal->setAlias(identifier->getAliasOrColumnName());
|
||||
ast = literal;
|
||||
}
|
||||
}
|
||||
else if (auto * node = ast->as<ASTFunction>())
|
||||
{
|
||||
String function_alias = node->getAliasOrColumnName();
|
||||
if (replaceMissedSubcolumnsInFunction(ast, column_name))
|
||||
ast->setAlias(function_alias);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto & child : ast->children)
|
||||
replaceMissedSubcolumnsInQuery(child, column_name);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
20
src/Interpreters/replaceMissedSubcolumnsInQuery.h
Normal file
20
src/Interpreters/replaceMissedSubcolumnsInQuery.h
Normal file
@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Names.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <base/types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Replace missed Object(Nullable('json')) subcolumns to NULL in query.
|
||||
void replaceMissedSubcolumnsInQuery(ASTPtr & ast, const String & column_name);
|
||||
|
||||
/// Return true if the ASTFunction has missed object subcolumns.
|
||||
/// Resolving ASTFunction independently is because we may lose the column name of missed object subcolumns.
|
||||
/// For example, if `b.d` is a missed object subcolumn, the column name of `b.d * 2 + 3` will be `plus(multiply(NULL, 2), 3)`,
|
||||
/// while we want to keep it as `plus(multiply(b.d, 2), 3)`.
|
||||
bool replaceMissedSubcolumnsInFunction(ASTPtr & ast, const String & column_name);
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,7 @@
|
||||
4 1
|
||||
{"id":"1","n":"aaa","obj.k4":null}
|
||||
{"id":"2","n":"bbb","obj.k4":null}
|
||||
{"id":"3","n":"ccc","obj.k4":null}
|
||||
{"id":"4","n":"ddd","obj.k4":null}
|
||||
4 1
|
||||
4 1
|
29
tests/queries/0_stateless/02886_missed_json_subcolumns.sql
Normal file
29
tests/queries/0_stateless/02886_missed_json_subcolumns.sql
Normal file
@ -0,0 +1,29 @@
|
||||
DROP TABLE IF EXISTS t_mutations_subcolumns;
|
||||
|
||||
SET allow_experimental_object_type = 1;
|
||||
|
||||
CREATE TABLE t_missed_subcolumns (id UInt64, n String, obj Object(Nullable('json')))
|
||||
ENGINE = MergeTree ORDER BY id;
|
||||
|
||||
INSERT INTO t_missed_subcolumns VALUES (1, 'aaa', '{"k1": {"k2": "foo"}, "k3": 5}');
|
||||
INSERT INTO t_missed_subcolumns VALUES (2, 'bbb', '{"k1": {"k2": "fee"}, "k3": 4}');
|
||||
INSERT INTO t_missed_subcolumns VALUES (3, 'ccc', '{"k1": {"k2": "foo", "k4": "baz"}, "k3": 4}');
|
||||
INSERT INTO t_missed_subcolumns VALUES (4, 'ddd', '{"k1": {"k2": "foo"}, "k3": 4}');
|
||||
|
||||
OPTIMIZE TABLE t_missed_subcolumns FINAL;
|
||||
|
||||
SELECT count(), min(id) FROM t_missed_subcolumns;
|
||||
|
||||
SELECT * FROM t_missed_subcolumns WHERE obj.k4 = 5 ORDER BY id FORMAT JSONEachRow;
|
||||
|
||||
SELECT * FROM t_missed_subcolumns WHERE obj.k1.k3 = 'fee' ORDER BY id FORMAT JSONEachRow;
|
||||
|
||||
SELECT id, n, obj.k4 FROM t_missed_subcolumns ORDER BY id FORMAT JSONEachRow;
|
||||
|
||||
ALTER TABLE t_missed_subcolumns DELETE WHERE obj.k4 = 5;
|
||||
SELECT count(), min(id) FROM t_missed_subcolumns;
|
||||
|
||||
DELETE FROM t_missed_subcolumns WHERE obj.k1.k3 = 'fee';
|
||||
SELECT count(), min(id) FROM t_missed_subcolumns;
|
||||
|
||||
DROP TABLE IF EXISTS t_missed_subcolumns;
|
Loading…
Reference in New Issue
Block a user