ClickHouse/src/Parsers/ASTColumnsTransformers.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

432 lines
13 KiB
C++
Raw Normal View History

2020-09-02 12:39:34 +00:00
#include <map>
2020-08-29 05:33:46 +00:00
#include "ASTColumnsTransformers.h"
#include <IO/WriteHelpers.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Common/SipHash.h>
#include <Common/quoteString.h>
2024-01-07 22:28:08 +00:00
#include <Common/re2.h>
2020-11-09 16:05:40 +00:00
#include <IO/Operators.h>
2021-08-20 06:44:51 +00:00
#include <stack>
2020-08-29 05:33:46 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NO_SUCH_COLUMN_IN_TABLE;
2021-01-03 16:45:27 +00:00
extern const int CANNOT_COMPILE_REGEXP;
2020-08-29 05:33:46 +00:00
}
2022-11-30 02:14:04 +00:00
void ASTColumnsTransformerList::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
for (const auto & child : children)
{
settings.ostr << ' ';
child->formatImpl(settings, state, frame);
}
}
2020-08-29 05:33:46 +00:00
void IASTColumnsTransformer::transform(const ASTPtr & transformer, ASTs & nodes)
{
if (const auto * apply = transformer->as<ASTColumnsApplyTransformer>())
{
apply->transform(nodes);
}
else if (const auto * except = transformer->as<ASTColumnsExceptTransformer>())
{
except->transform(nodes);
}
else if (const auto * replace = transformer->as<ASTColumnsReplaceTransformer>())
{
replace->transform(nodes);
}
}
2020-11-02 03:10:20 +00:00
void ASTColumnsApplyTransformer::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
2020-08-29 05:33:46 +00:00
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << "APPLY" << (settings.hilite ? hilite_none : "") << " ";
if (!column_name_prefix.empty())
settings.ostr << "(";
2021-08-20 06:44:51 +00:00
if (lambda)
{
lambda->formatImpl(settings, state, frame);
}
else
{
settings.ostr << func_name;
if (parameters)
{
auto nested_frame = frame;
nested_frame.expression_list_prepend_whitespace = false;
settings.ostr << "(";
parameters->formatImpl(settings, state, nested_frame);
settings.ostr << ")";
}
2021-08-20 06:44:51 +00:00
}
if (!column_name_prefix.empty())
2020-11-09 05:58:32 +00:00
settings.ostr << ", '" << column_name_prefix << "')";
2020-08-29 05:33:46 +00:00
}
void ASTColumnsApplyTransformer::transform(ASTs & nodes) const
{
for (auto & column : nodes)
{
String name;
auto alias = column->tryGetAlias();
if (!alias.empty())
name = alias;
else
{
if (const auto * id = column->as<ASTIdentifier>())
name = id->shortName();
else
name = column->getColumnName();
}
2021-08-20 06:44:51 +00:00
if (lambda)
{
auto body = lambda->as<const ASTFunction &>().arguments->children.at(1)->clone();
std::stack<ASTPtr> stack;
stack.push(body);
while (!stack.empty())
{
auto ast = stack.top();
stack.pop();
for (auto & child : ast->children)
{
if (auto arg_name = tryGetIdentifierName(child); arg_name && arg_name == lambda_arg)
{
child = column->clone();
continue;
}
stack.push(child);
}
}
column = body;
}
else
{
auto function = makeASTFunction(func_name, column);
function->parameters = parameters;
column = function;
}
if (!column_name_prefix.empty())
column->setAlias(column_name_prefix + name);
2020-08-29 05:33:46 +00:00
}
}
void ASTColumnsApplyTransformer::appendColumnName(WriteBuffer & ostr) const
{
writeCString("APPLY ", ostr);
if (!column_name_prefix.empty())
writeChar('(', ostr);
if (lambda)
lambda->appendColumnName(ostr);
else
{
writeString(func_name, ostr);
if (parameters)
parameters->appendColumnName(ostr);
}
if (!column_name_prefix.empty())
{
writeCString(", '", ostr);
writeString(column_name_prefix, ostr);
writeCString("')", ostr);
}
}
2023-11-10 12:15:23 +00:00
void ASTColumnsApplyTransformer::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
{
hash_state.update(func_name.size());
hash_state.update(func_name);
if (parameters)
2023-11-10 12:15:23 +00:00
parameters->updateTreeHashImpl(hash_state, ignore_aliases);
if (lambda)
2023-11-10 12:15:23 +00:00
lambda->updateTreeHashImpl(hash_state, ignore_aliases);
hash_state.update(lambda_arg.size());
hash_state.update(lambda_arg);
hash_state.update(column_name_prefix.size());
hash_state.update(column_name_prefix);
2023-11-10 12:15:23 +00:00
IAST::updateTreeHashImpl(hash_state, ignore_aliases);
}
2020-08-29 05:33:46 +00:00
void ASTColumnsExceptTransformer::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
2020-11-09 09:13:27 +00:00
settings.ostr << (settings.hilite ? hilite_keyword : "") << "EXCEPT" << (is_strict ? " STRICT " : " ") << (settings.hilite ? hilite_none : "");
2020-10-23 09:15:55 +00:00
if (children.size() > 1)
2020-11-09 09:13:27 +00:00
settings.ostr << "(";
2020-08-29 05:33:46 +00:00
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
{
if (it != children.begin())
{
settings.ostr << ", ";
}
(*it)->formatImpl(settings, state, frame);
}
2021-01-03 16:45:27 +00:00
if (!original_pattern.empty())
settings.ostr << quoteString(original_pattern);
2020-10-23 09:15:55 +00:00
if (children.size() > 1)
settings.ostr << ")";
2020-08-29 05:33:46 +00:00
}
void ASTColumnsExceptTransformer::appendColumnName(WriteBuffer & ostr) const
{
writeCString("EXCEPT ", ostr);
if (is_strict)
writeCString("STRICT ", ostr);
if (children.size() > 1)
writeChar('(', ostr);
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
{
if (it != children.begin())
writeCString(", ", ostr);
(*it)->appendColumnName(ostr);
}
if (!original_pattern.empty())
writeQuotedString(original_pattern, ostr);
if (children.size() > 1)
writeChar(')', ostr);
}
2023-11-10 12:15:23 +00:00
void ASTColumnsExceptTransformer::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
{
hash_state.update(is_strict);
hash_state.update(original_pattern.size());
hash_state.update(original_pattern);
2023-11-10 12:15:23 +00:00
IAST::updateTreeHashImpl(hash_state, ignore_aliases);
}
2020-08-29 05:33:46 +00:00
void ASTColumnsExceptTransformer::transform(ASTs & nodes) const
{
std::set<String> expected_columns;
2021-01-03 16:45:27 +00:00
if (original_pattern.empty())
2020-11-09 09:13:27 +00:00
{
2021-01-03 16:45:27 +00:00
for (const auto & child : children)
expected_columns.insert(child->as<const ASTIdentifier &>().name());
2022-10-18 09:40:12 +00:00
for (auto * it = nodes.begin(); it != nodes.end();)
2020-11-09 09:13:27 +00:00
{
2021-01-03 16:45:27 +00:00
if (const auto * id = it->get()->as<ASTIdentifier>())
2020-08-29 05:33:46 +00:00
{
2021-01-03 16:45:27 +00:00
auto expected_column = expected_columns.find(id->shortName());
if (expected_column != expected_columns.end())
{
expected_columns.erase(expected_column);
it = nodes.erase(it);
continue;
}
2020-11-09 09:13:27 +00:00
}
2021-01-03 16:45:27 +00:00
++it;
2020-11-09 09:13:27 +00:00
}
2021-01-03 16:45:27 +00:00
}
else
{
2022-10-18 09:40:12 +00:00
for (auto * it = nodes.begin(); it != nodes.end();)
2021-01-03 16:45:27 +00:00
{
if (const auto * id = it->get()->as<ASTIdentifier>())
{
if (isColumnMatching(id->shortName()))
{
it = nodes.erase(it);
continue;
}
}
2020-11-09 09:13:27 +00:00
++it;
2021-01-03 16:45:27 +00:00
}
2020-11-09 09:13:27 +00:00
}
2020-10-22 04:40:50 +00:00
if (is_strict && !expected_columns.empty())
{
2020-10-22 04:40:50 +00:00
String expected_columns_str;
std::for_each(expected_columns.begin(), expected_columns.end(),
[&](String x) { expected_columns_str += (" " + x) ; });
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Columns transformer EXCEPT expects following column(s) :{}",
expected_columns_str);
}
2020-08-29 05:33:46 +00:00
}
2021-01-03 16:45:27 +00:00
void ASTColumnsExceptTransformer::setPattern(String pattern)
{
original_pattern = std::move(pattern);
column_matcher = std::make_shared<RE2>(original_pattern, RE2::Quiet);
if (!column_matcher->ok())
2023-01-23 13:16:14 +00:00
throw DB::Exception(DB::ErrorCodes::CANNOT_COMPILE_REGEXP, "COLUMNS pattern {} cannot be compiled: {}",
original_pattern, column_matcher->error());
2021-01-03 16:45:27 +00:00
}
2022-07-14 11:20:16 +00:00
const std::shared_ptr<re2::RE2> & ASTColumnsExceptTransformer::getMatcher() const
{
return column_matcher;
}
2021-01-03 16:45:27 +00:00
bool ASTColumnsExceptTransformer::isColumnMatching(const String & column_name) const
{
return RE2::PartialMatch(column_name, *column_matcher);
}
2020-08-29 05:33:46 +00:00
void ASTColumnsReplaceTransformer::Replacement::formatImpl(
const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
assert(children.size() == 1);
children[0]->formatImpl(settings, state, frame);
settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(name);
2020-08-29 05:33:46 +00:00
}
void ASTColumnsReplaceTransformer::Replacement::appendColumnName(WriteBuffer & ostr) const
{
assert(children.size() == 1);
children[0]->appendColumnName(ostr);
writeCString(" AS ", ostr);
writeProbablyBackQuotedString(name, ostr);
}
2023-11-10 12:15:23 +00:00
void ASTColumnsReplaceTransformer::Replacement::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
{
assert(children.size() == 1);
hash_state.update(name.size());
hash_state.update(name);
2023-11-10 12:15:23 +00:00
children[0]->updateTreeHashImpl(hash_state, ignore_aliases);
IAST::updateTreeHashImpl(hash_state, ignore_aliases);
}
2020-08-29 05:33:46 +00:00
void ASTColumnsReplaceTransformer::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
2020-11-09 09:13:27 +00:00
settings.ostr << (settings.hilite ? hilite_keyword : "") << "REPLACE" << (is_strict ? " STRICT " : " ") << (settings.hilite ? hilite_none : "");
2020-10-23 09:15:55 +00:00
if (children.size() > 1)
2020-11-09 09:13:27 +00:00
settings.ostr << "(";
2020-08-29 05:33:46 +00:00
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
{
if (it != children.begin())
settings.ostr << ", ";
2020-08-29 05:33:46 +00:00
(*it)->formatImpl(settings, state, frame);
}
2020-10-23 09:15:55 +00:00
if (children.size() > 1)
settings.ostr << ")";
2020-08-29 05:33:46 +00:00
}
void ASTColumnsReplaceTransformer::appendColumnName(WriteBuffer & ostr) const
{
writeCString("REPLACE ", ostr);
if (is_strict)
writeCString("STRICT ", ostr);
if (children.size() > 1)
writeChar('(', ostr);
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
{
if (it != children.begin())
writeCString(", ", ostr);
(*it)->appendColumnName(ostr);
}
if (children.size() > 1)
writeChar(')', ostr);
}
2023-11-10 12:15:23 +00:00
void ASTColumnsReplaceTransformer::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
{
hash_state.update(is_strict);
2023-11-10 12:15:23 +00:00
IAST::updateTreeHashImpl(hash_state, ignore_aliases);
}
2020-08-29 05:33:46 +00:00
void ASTColumnsReplaceTransformer::replaceChildren(ASTPtr & node, const ASTPtr & replacement, const String & name)
{
for (auto & child : node->children)
{
if (const auto * id = child->as<ASTIdentifier>())
{
if (id->shortName() == name)
child = replacement->clone();
2020-08-29 05:33:46 +00:00
}
else
replaceChildren(child, replacement, name);
}
}
void ASTColumnsReplaceTransformer::transform(ASTs & nodes) const
{
std::map<String, ASTPtr> replace_map;
for (const auto & replace_child : children)
{
auto & replacement = replace_child->as<Replacement &>();
if (replace_map.find(replacement.name) != replace_map.end())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Expressions in columns transformer REPLACE should not contain the same replacement more than once");
replace_map.emplace(replacement.name, replacement.children[0]);
2020-08-29 05:33:46 +00:00
}
for (auto & column : nodes)
{
if (const auto * id = column->as<ASTIdentifier>())
{
auto replace_it = replace_map.find(id->shortName());
if (replace_it != replace_map.end())
{
column = replace_it->second;
column->setAlias(replace_it->first);
replace_map.erase(replace_it);
2020-08-29 05:33:46 +00:00
}
}
else if (auto * ast_with_alias = dynamic_cast<ASTWithAlias *>(column.get()))
{
auto replace_it = replace_map.find(ast_with_alias->alias);
if (replace_it != replace_map.end())
{
auto new_ast = replace_it->second->clone();
ast_with_alias->alias = ""; // remove the old alias as it's useless after replace transformation
replaceChildren(new_ast, column, replace_it->first);
column = new_ast;
column->setAlias(replace_it->first);
replace_map.erase(replace_it);
2020-08-29 05:33:46 +00:00
}
}
}
if (is_strict && !replace_map.empty())
{
2020-11-09 05:58:32 +00:00
String expected_columns;
for (auto & elem: replace_map)
{
2020-11-09 05:58:32 +00:00
if (!expected_columns.empty())
2020-10-22 04:40:50 +00:00
expected_columns += ", ";
2020-11-09 05:58:32 +00:00
expected_columns += elem.first;
}
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Columns transformer REPLACE expects following column(s) : {}",
expected_columns);
}
2020-08-29 05:33:46 +00:00
}
}