2020-09-02 12:39:34 +00:00
|
|
|
#include <map>
|
2020-08-29 05:33:46 +00:00
|
|
|
#include "ASTColumnsTransformers.h"
|
|
|
|
#include <IO/WriteHelpers.h>
|
|
|
|
#include <Parsers/ASTFunction.h>
|
|
|
|
#include <Parsers/ASTIdentifier.h>
|
|
|
|
#include <Common/SipHash.h>
|
|
|
|
#include <Common/quoteString.h>
|
2020-11-09 16:05:40 +00:00
|
|
|
#include <IO/Operators.h>
|
2021-01-03 16:45:27 +00:00
|
|
|
#include <re2/re2.h>
|
2021-08-20 06:44:51 +00:00
|
|
|
#include <stack>
|
2020-08-29 05:33:46 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
2020-10-20 07:38:56 +00:00
|
|
|
extern const int NO_SUCH_COLUMN_IN_TABLE;
|
2021-01-03 16:45:27 +00:00
|
|
|
extern const int CANNOT_COMPILE_REGEXP;
|
2020-08-29 05:33:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IASTColumnsTransformer::transform(const ASTPtr & transformer, ASTs & nodes)
|
|
|
|
{
|
|
|
|
if (const auto * apply = transformer->as<ASTColumnsApplyTransformer>())
|
|
|
|
{
|
|
|
|
apply->transform(nodes);
|
|
|
|
}
|
|
|
|
else if (const auto * except = transformer->as<ASTColumnsExceptTransformer>())
|
|
|
|
{
|
|
|
|
except->transform(nodes);
|
|
|
|
}
|
|
|
|
else if (const auto * replace = transformer->as<ASTColumnsReplaceTransformer>())
|
|
|
|
{
|
|
|
|
replace->transform(nodes);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-02 03:10:20 +00:00
|
|
|
void ASTColumnsApplyTransformer::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
2020-08-29 05:33:46 +00:00
|
|
|
{
|
2020-10-26 03:27:55 +00:00
|
|
|
settings.ostr << (settings.hilite ? hilite_keyword : "") << "APPLY" << (settings.hilite ? hilite_none : "") << " ";
|
|
|
|
|
|
|
|
if (!column_name_prefix.empty())
|
2020-11-06 04:02:45 +00:00
|
|
|
settings.ostr << "(";
|
|
|
|
|
2021-08-20 06:44:51 +00:00
|
|
|
if (lambda)
|
|
|
|
{
|
|
|
|
lambda->formatImpl(settings, state, frame);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
settings.ostr << func_name;
|
|
|
|
|
|
|
|
if (parameters)
|
|
|
|
parameters->formatImpl(settings, state, frame);
|
|
|
|
}
|
2020-11-06 04:02:45 +00:00
|
|
|
|
|
|
|
if (!column_name_prefix.empty())
|
2020-11-09 05:58:32 +00:00
|
|
|
settings.ostr << ", '" << column_name_prefix << "')";
|
2020-08-29 05:33:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void ASTColumnsApplyTransformer::transform(ASTs & nodes) const
|
|
|
|
{
|
|
|
|
for (auto & column : nodes)
|
|
|
|
{
|
2020-10-26 03:27:55 +00:00
|
|
|
String name;
|
|
|
|
auto alias = column->tryGetAlias();
|
|
|
|
if (!alias.empty())
|
|
|
|
name = alias;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (const auto * id = column->as<ASTIdentifier>())
|
|
|
|
name = id->shortName();
|
|
|
|
else
|
|
|
|
name = column->getColumnName();
|
|
|
|
}
|
2021-08-20 06:44:51 +00:00
|
|
|
if (lambda)
|
|
|
|
{
|
|
|
|
auto body = lambda->as<const ASTFunction &>().arguments->children.at(1)->clone();
|
|
|
|
std::stack<ASTPtr> stack;
|
|
|
|
stack.push(body);
|
|
|
|
while (!stack.empty())
|
|
|
|
{
|
|
|
|
auto ast = stack.top();
|
|
|
|
stack.pop();
|
|
|
|
for (auto & child : ast->children)
|
|
|
|
{
|
|
|
|
if (auto arg_name = tryGetIdentifierName(child); arg_name && arg_name == lambda_arg)
|
|
|
|
{
|
|
|
|
child = column->clone();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
stack.push(child);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
column = body;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
auto function = makeASTFunction(func_name, column);
|
|
|
|
function->parameters = parameters;
|
|
|
|
column = function;
|
|
|
|
}
|
2020-10-26 03:27:55 +00:00
|
|
|
if (!column_name_prefix.empty())
|
|
|
|
column->setAlias(column_name_prefix + name);
|
2020-08-29 05:33:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-19 17:22:04 +00:00
|
|
|
void ASTColumnsApplyTransformer::appendColumnName(WriteBuffer & ostr) const
|
|
|
|
{
|
|
|
|
writeCString("APPLY ", ostr);
|
|
|
|
if (!column_name_prefix.empty())
|
|
|
|
writeChar('(', ostr);
|
|
|
|
|
|
|
|
if (lambda)
|
|
|
|
lambda->appendColumnName(ostr);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
writeString(func_name, ostr);
|
|
|
|
|
|
|
|
if (parameters)
|
|
|
|
parameters->appendColumnName(ostr);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!column_name_prefix.empty())
|
|
|
|
{
|
|
|
|
writeCString(", '", ostr);
|
|
|
|
writeString(column_name_prefix, ostr);
|
|
|
|
writeCString("')", ostr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void ASTColumnsApplyTransformer::updateTreeHashImpl(SipHash & hash_state) const
|
|
|
|
{
|
|
|
|
hash_state.update(func_name.size());
|
|
|
|
hash_state.update(func_name);
|
|
|
|
if (parameters)
|
|
|
|
parameters->updateTreeHashImpl(hash_state);
|
|
|
|
|
|
|
|
if (lambda)
|
|
|
|
lambda->updateTreeHashImpl(hash_state);
|
|
|
|
|
|
|
|
hash_state.update(lambda_arg.size());
|
|
|
|
hash_state.update(lambda_arg);
|
|
|
|
|
|
|
|
hash_state.update(column_name_prefix.size());
|
|
|
|
hash_state.update(column_name_prefix);
|
|
|
|
|
|
|
|
IAST::updateTreeHashImpl(hash_state);
|
|
|
|
}
|
|
|
|
|
2020-08-29 05:33:46 +00:00
|
|
|
void ASTColumnsExceptTransformer::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
|
|
|
{
|
2020-11-09 09:13:27 +00:00
|
|
|
settings.ostr << (settings.hilite ? hilite_keyword : "") << "EXCEPT" << (is_strict ? " STRICT " : " ") << (settings.hilite ? hilite_none : "");
|
2020-10-23 09:15:55 +00:00
|
|
|
|
|
|
|
if (children.size() > 1)
|
2020-11-09 09:13:27 +00:00
|
|
|
settings.ostr << "(";
|
2020-08-29 05:33:46 +00:00
|
|
|
|
|
|
|
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
|
|
|
|
{
|
|
|
|
if (it != children.begin())
|
|
|
|
{
|
|
|
|
settings.ostr << ", ";
|
|
|
|
}
|
|
|
|
(*it)->formatImpl(settings, state, frame);
|
|
|
|
}
|
|
|
|
|
2021-01-03 16:45:27 +00:00
|
|
|
if (!original_pattern.empty())
|
|
|
|
settings.ostr << quoteString(original_pattern);
|
|
|
|
|
2020-10-23 09:15:55 +00:00
|
|
|
if (children.size() > 1)
|
|
|
|
settings.ostr << ")";
|
2020-08-29 05:33:46 +00:00
|
|
|
}
|
|
|
|
|
2022-04-19 17:22:04 +00:00
|
|
|
void ASTColumnsExceptTransformer::appendColumnName(WriteBuffer & ostr) const
|
|
|
|
{
|
|
|
|
writeCString("EXCEPT ", ostr);
|
|
|
|
if (is_strict)
|
|
|
|
writeCString("STRICT ", ostr);
|
|
|
|
|
|
|
|
if (children.size() > 1)
|
|
|
|
writeChar('(', ostr);
|
|
|
|
|
|
|
|
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
|
|
|
|
{
|
|
|
|
if (it != children.begin())
|
|
|
|
writeCString(", ", ostr);
|
|
|
|
(*it)->appendColumnName(ostr);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!original_pattern.empty())
|
|
|
|
writeQuotedString(original_pattern, ostr);
|
|
|
|
|
|
|
|
if (children.size() > 1)
|
|
|
|
writeChar(')', ostr);
|
|
|
|
}
|
|
|
|
|
|
|
|
void ASTColumnsExceptTransformer::updateTreeHashImpl(SipHash & hash_state) const
|
|
|
|
{
|
|
|
|
hash_state.update(is_strict);
|
|
|
|
hash_state.update(original_pattern.size());
|
|
|
|
hash_state.update(original_pattern);
|
|
|
|
|
|
|
|
IAST::updateTreeHashImpl(hash_state);
|
|
|
|
}
|
|
|
|
|
2020-08-29 05:33:46 +00:00
|
|
|
void ASTColumnsExceptTransformer::transform(ASTs & nodes) const
|
|
|
|
{
|
2020-11-10 03:14:41 +00:00
|
|
|
std::set<String> expected_columns;
|
2021-01-03 16:45:27 +00:00
|
|
|
if (original_pattern.empty())
|
2020-11-09 09:13:27 +00:00
|
|
|
{
|
2021-01-03 16:45:27 +00:00
|
|
|
for (const auto & child : children)
|
|
|
|
expected_columns.insert(child->as<const ASTIdentifier &>().name());
|
|
|
|
|
|
|
|
for (auto it = nodes.begin(); it != nodes.end();)
|
2020-11-09 09:13:27 +00:00
|
|
|
{
|
2021-01-03 16:45:27 +00:00
|
|
|
if (const auto * id = it->get()->as<ASTIdentifier>())
|
2020-08-29 05:33:46 +00:00
|
|
|
{
|
2021-01-03 16:45:27 +00:00
|
|
|
auto expected_column = expected_columns.find(id->shortName());
|
|
|
|
if (expected_column != expected_columns.end())
|
|
|
|
{
|
|
|
|
expected_columns.erase(expected_column);
|
|
|
|
it = nodes.erase(it);
|
|
|
|
continue;
|
|
|
|
}
|
2020-11-09 09:13:27 +00:00
|
|
|
}
|
2021-01-03 16:45:27 +00:00
|
|
|
++it;
|
2020-11-09 09:13:27 +00:00
|
|
|
}
|
2021-01-03 16:45:27 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (auto it = nodes.begin(); it != nodes.end();)
|
|
|
|
{
|
|
|
|
if (const auto * id = it->get()->as<ASTIdentifier>())
|
|
|
|
{
|
|
|
|
if (isColumnMatching(id->shortName()))
|
|
|
|
{
|
|
|
|
it = nodes.erase(it);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2020-11-09 09:13:27 +00:00
|
|
|
++it;
|
2021-01-03 16:45:27 +00:00
|
|
|
}
|
2020-11-09 09:13:27 +00:00
|
|
|
}
|
2020-10-21 07:54:13 +00:00
|
|
|
|
2020-10-22 04:40:50 +00:00
|
|
|
if (is_strict && !expected_columns.empty())
|
2020-10-21 07:54:13 +00:00
|
|
|
{
|
2020-10-22 04:40:50 +00:00
|
|
|
String expected_columns_str;
|
2020-11-10 03:14:41 +00:00
|
|
|
std::for_each(expected_columns.begin(), expected_columns.end(),
|
|
|
|
[&](String x) { expected_columns_str += (" " + x) ; });
|
2020-10-21 07:54:13 +00:00
|
|
|
|
|
|
|
throw Exception(
|
2020-11-10 03:14:41 +00:00
|
|
|
"Columns transformer EXCEPT expects following column(s) :" + expected_columns_str,
|
2020-10-21 07:54:13 +00:00
|
|
|
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
|
|
|
}
|
2020-08-29 05:33:46 +00:00
|
|
|
}
|
|
|
|
|
2021-01-03 16:45:27 +00:00
|
|
|
void ASTColumnsExceptTransformer::setPattern(String pattern)
|
|
|
|
{
|
|
|
|
original_pattern = std::move(pattern);
|
|
|
|
column_matcher = std::make_shared<RE2>(original_pattern, RE2::Quiet);
|
|
|
|
if (!column_matcher->ok())
|
|
|
|
throw DB::Exception(
|
|
|
|
"COLUMNS pattern " + original_pattern + " cannot be compiled: " + column_matcher->error(),
|
|
|
|
DB::ErrorCodes::CANNOT_COMPILE_REGEXP);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ASTColumnsExceptTransformer::isColumnMatching(const String & column_name) const
|
|
|
|
{
|
|
|
|
return RE2::PartialMatch(column_name, *column_matcher);
|
|
|
|
}
|
|
|
|
|
2020-08-29 05:33:46 +00:00
|
|
|
void ASTColumnsReplaceTransformer::Replacement::formatImpl(
|
|
|
|
const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
|
|
|
{
|
|
|
|
expr->formatImpl(settings, state, frame);
|
2021-06-17 03:12:26 +00:00
|
|
|
settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(name);
|
2020-08-29 05:33:46 +00:00
|
|
|
}
|
|
|
|
|
2022-04-19 17:22:04 +00:00
|
|
|
void ASTColumnsReplaceTransformer::Replacement::appendColumnName(WriteBuffer & ostr) const
|
|
|
|
{
|
|
|
|
expr->appendColumnName(ostr);
|
|
|
|
writeCString(" AS ", ostr);
|
|
|
|
writeProbablyBackQuotedString(name, ostr);
|
|
|
|
}
|
|
|
|
|
|
|
|
void ASTColumnsReplaceTransformer::Replacement::updateTreeHashImpl(SipHash & hash_state) const
|
|
|
|
{
|
|
|
|
hash_state.update(name.size());
|
|
|
|
hash_state.update(name);
|
|
|
|
expr->updateTreeHashImpl(hash_state);
|
|
|
|
IAST::updateTreeHashImpl(hash_state);
|
|
|
|
}
|
|
|
|
|
2020-08-29 05:33:46 +00:00
|
|
|
void ASTColumnsReplaceTransformer::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
|
|
|
{
|
2020-11-09 09:13:27 +00:00
|
|
|
settings.ostr << (settings.hilite ? hilite_keyword : "") << "REPLACE" << (is_strict ? " STRICT " : " ") << (settings.hilite ? hilite_none : "");
|
2020-10-23 09:15:55 +00:00
|
|
|
|
|
|
|
if (children.size() > 1)
|
2020-11-09 09:13:27 +00:00
|
|
|
settings.ostr << "(";
|
2020-08-29 05:33:46 +00:00
|
|
|
|
|
|
|
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
|
|
|
|
{
|
|
|
|
if (it != children.begin())
|
|
|
|
settings.ostr << ", ";
|
2022-04-19 17:22:04 +00:00
|
|
|
|
2020-08-29 05:33:46 +00:00
|
|
|
(*it)->formatImpl(settings, state, frame);
|
|
|
|
}
|
|
|
|
|
2020-10-23 09:15:55 +00:00
|
|
|
if (children.size() > 1)
|
|
|
|
settings.ostr << ")";
|
2020-08-29 05:33:46 +00:00
|
|
|
}
|
|
|
|
|
2022-04-19 17:22:04 +00:00
|
|
|
void ASTColumnsReplaceTransformer::appendColumnName(WriteBuffer & ostr) const
|
|
|
|
{
|
|
|
|
writeCString("REPLACE ", ostr);
|
|
|
|
if (is_strict)
|
|
|
|
writeCString("STRICT ", ostr);
|
|
|
|
|
|
|
|
if (children.size() > 1)
|
|
|
|
writeChar('(', ostr);
|
|
|
|
|
|
|
|
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
|
|
|
|
{
|
|
|
|
if (it != children.begin())
|
|
|
|
writeCString(", ", ostr);
|
|
|
|
(*it)->appendColumnName(ostr);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (children.size() > 1)
|
|
|
|
writeChar(')', ostr);
|
|
|
|
}
|
|
|
|
|
|
|
|
void ASTColumnsReplaceTransformer::updateTreeHashImpl(SipHash & hash_state) const
|
|
|
|
{
|
|
|
|
hash_state.update(is_strict);
|
|
|
|
IAST::updateTreeHashImpl(hash_state);
|
|
|
|
}
|
|
|
|
|
2020-08-29 05:33:46 +00:00
|
|
|
void ASTColumnsReplaceTransformer::replaceChildren(ASTPtr & node, const ASTPtr & replacement, const String & name)
|
|
|
|
{
|
|
|
|
for (auto & child : node->children)
|
|
|
|
{
|
|
|
|
if (const auto * id = child->as<ASTIdentifier>())
|
|
|
|
{
|
|
|
|
if (id->shortName() == name)
|
2020-09-11 07:46:14 +00:00
|
|
|
child = replacement->clone();
|
2020-08-29 05:33:46 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
replaceChildren(child, replacement, name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void ASTColumnsReplaceTransformer::transform(ASTs & nodes) const
|
|
|
|
{
|
|
|
|
std::map<String, ASTPtr> replace_map;
|
|
|
|
for (const auto & replace_child : children)
|
|
|
|
{
|
|
|
|
auto & replacement = replace_child->as<Replacement &>();
|
|
|
|
if (replace_map.find(replacement.name) != replace_map.end())
|
|
|
|
throw Exception(
|
|
|
|
"Expressions in columns transformer REPLACE should not contain the same replacement more than once",
|
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
replace_map.emplace(replacement.name, replacement.expr);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto & column : nodes)
|
|
|
|
{
|
|
|
|
if (const auto * id = column->as<ASTIdentifier>())
|
|
|
|
{
|
|
|
|
auto replace_it = replace_map.find(id->shortName());
|
|
|
|
if (replace_it != replace_map.end())
|
|
|
|
{
|
|
|
|
column = replace_it->second;
|
|
|
|
column->setAlias(replace_it->first);
|
2020-10-21 07:54:13 +00:00
|
|
|
replace_map.erase(replace_it);
|
2020-08-29 05:33:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (auto * ast_with_alias = dynamic_cast<ASTWithAlias *>(column.get()))
|
|
|
|
{
|
|
|
|
auto replace_it = replace_map.find(ast_with_alias->alias);
|
|
|
|
if (replace_it != replace_map.end())
|
|
|
|
{
|
|
|
|
auto new_ast = replace_it->second->clone();
|
|
|
|
ast_with_alias->alias = ""; // remove the old alias as it's useless after replace transformation
|
|
|
|
replaceChildren(new_ast, column, replace_it->first);
|
|
|
|
column = new_ast;
|
|
|
|
column->setAlias(replace_it->first);
|
2020-10-21 07:54:13 +00:00
|
|
|
replace_map.erase(replace_it);
|
2020-08-29 05:33:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-10-20 07:38:56 +00:00
|
|
|
|
2020-10-21 07:54:13 +00:00
|
|
|
if (is_strict && !replace_map.empty())
|
|
|
|
{
|
2020-11-09 05:58:32 +00:00
|
|
|
String expected_columns;
|
|
|
|
for (auto & elem: replace_map)
|
2020-10-21 07:54:13 +00:00
|
|
|
{
|
2020-11-09 05:58:32 +00:00
|
|
|
if (!expected_columns.empty())
|
2020-10-22 04:40:50 +00:00
|
|
|
expected_columns += ", ";
|
2020-11-09 05:58:32 +00:00
|
|
|
expected_columns += elem.first;
|
2020-10-21 07:54:13 +00:00
|
|
|
}
|
2020-10-20 07:38:56 +00:00
|
|
|
throw Exception(
|
2020-10-22 04:40:50 +00:00
|
|
|
"Columns transformer REPLACE expects following column(s) : " + expected_columns,
|
2020-10-20 07:38:56 +00:00
|
|
|
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
2020-10-21 07:54:13 +00:00
|
|
|
}
|
|
|
|
|
2020-08-29 05:33:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|