column excepts with regex

This commit is contained in:
Amos Bird 2021-01-04 00:45:27 +08:00
parent 19e0e1a403
commit 3145ca999d
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
5 changed files with 74 additions and 16 deletions

View File

@ -6,6 +6,7 @@
#include <Common/SipHash.h> #include <Common/SipHash.h>
#include <Common/quoteString.h> #include <Common/quoteString.h>
#include <IO/Operators.h> #include <IO/Operators.h>
#include <re2/re2.h>
namespace DB namespace DB
@ -14,6 +15,7 @@ namespace ErrorCodes
{ {
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NO_SUCH_COLUMN_IN_TABLE; extern const int NO_SUCH_COLUMN_IN_TABLE;
extern const int CANNOT_COMPILE_REGEXP;
} }
void IASTColumnsTransformer::transform(const ASTPtr & transformer, ASTs & nodes) void IASTColumnsTransformer::transform(const ASTPtr & transformer, ASTs & nodes)
@ -86,6 +88,9 @@ void ASTColumnsExceptTransformer::formatImpl(const FormatSettings & settings, Fo
(*it)->formatImpl(settings, state, frame); (*it)->formatImpl(settings, state, frame);
} }
if (!original_pattern.empty())
settings.ostr << quoteString(original_pattern);
if (children.size() > 1) if (children.size() > 1)
settings.ostr << ")"; settings.ostr << ")";
} }
@ -93,24 +98,40 @@ void ASTColumnsExceptTransformer::formatImpl(const FormatSettings & settings, Fo
void ASTColumnsExceptTransformer::transform(ASTs & nodes) const void ASTColumnsExceptTransformer::transform(ASTs & nodes) const
{ {
std::set<String> expected_columns; std::set<String> expected_columns;
for (const auto & child : children) if (original_pattern.empty())
expected_columns.insert(child->as<const ASTIdentifier &>().name());
for (auto it = nodes.begin(); it != nodes.end();)
{ {
if (const auto * id = it->get()->as<ASTIdentifier>()) for (const auto & child : children)
expected_columns.insert(child->as<const ASTIdentifier &>().name());
for (auto it = nodes.begin(); it != nodes.end();)
{ {
auto expected_column = expected_columns.find(id->shortName()); if (const auto * id = it->get()->as<ASTIdentifier>())
if (expected_column != expected_columns.end())
{ {
expected_columns.erase(expected_column); auto expected_column = expected_columns.find(id->shortName());
it = nodes.erase(it); if (expected_column != expected_columns.end())
{
expected_columns.erase(expected_column);
it = nodes.erase(it);
continue;
}
} }
else
++it;
}
else
++it; ++it;
}
}
else
{
for (auto it = nodes.begin(); it != nodes.end();)
{
if (const auto * id = it->get()->as<ASTIdentifier>())
{
if (isColumnMatching(id->shortName()))
{
it = nodes.erase(it);
continue;
}
}
++it;
}
} }
if (is_strict && !expected_columns.empty()) if (is_strict && !expected_columns.empty())
@ -125,6 +146,21 @@ void ASTColumnsExceptTransformer::transform(ASTs & nodes) const
} }
} }
void ASTColumnsExceptTransformer::setPattern(String pattern)
{
original_pattern = std::move(pattern);
column_matcher = std::make_shared<RE2>(original_pattern, RE2::Quiet);
if (!column_matcher->ok())
throw DB::Exception(
"COLUMNS pattern " + original_pattern + " cannot be compiled: " + column_matcher->error(),
DB::ErrorCodes::CANNOT_COMPILE_REGEXP);
}
bool ASTColumnsExceptTransformer::isColumnMatching(const String & column_name) const
{
return RE2::PartialMatch(column_name, *column_matcher);
}
void ASTColumnsReplaceTransformer::Replacement::formatImpl( void ASTColumnsReplaceTransformer::Replacement::formatImpl(
const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{ {

View File

@ -2,6 +2,11 @@
#include <Parsers/IAST.h> #include <Parsers/IAST.h>
namespace re2
{
class RE2;
}
namespace DB namespace DB
{ {
class IASTColumnsTransformer : public IAST class IASTColumnsTransformer : public IAST
@ -43,9 +48,13 @@ public:
return clone; return clone;
} }
void transform(ASTs & nodes) const override; void transform(ASTs & nodes) const override;
void setPattern(String pattern);
bool isColumnMatching(const String & column_name) const;
protected: protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
std::shared_ptr<re2::RE2> column_matcher;
String original_pattern;
}; };
class ASTColumnsReplaceTransformer : public IASTColumnsTransformer class ASTColumnsReplaceTransformer : public IASTColumnsTransformer

View File

@ -1427,6 +1427,8 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e
is_strict = true; is_strict = true;
ASTs identifiers; ASTs identifiers;
ASTPtr regex_node;
ParserStringLiteral regex;
auto parse_id = [&identifiers, &pos, &expected] auto parse_id = [&identifiers, &pos, &expected]
{ {
ASTPtr identifier; ASTPtr identifier;
@ -1441,7 +1443,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e
{ {
// support one or more parameter // support one or more parameter
++pos; ++pos;
if (!ParserList::parseUtil(pos, expected, parse_id, false)) if (!ParserList::parseUtil(pos, expected, parse_id, false) && !regex.parse(pos, regex_node, expected))
return false; return false;
if (pos->type != TokenType::ClosingRoundBracket) if (pos->type != TokenType::ClosingRoundBracket)
@ -1451,12 +1453,15 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e
else else
{ {
// only one parameter // only one parameter
if (!parse_id()) if (!parse_id() && !regex.parse(pos, regex_node, expected))
return false; return false;
} }
auto res = std::make_shared<ASTColumnsExceptTransformer>(); auto res = std::make_shared<ASTColumnsExceptTransformer>();
res->children = std::move(identifiers); if (regex_node)
res->setPattern(regex_node->as<ASTLiteral &>().value.get<String>());
else
res->children = std::move(identifiers);
res->is_strict = is_strict; res->is_strict = is_strict;
node = std::move(res); node = std::move(res);
return true; return true;

View File

@ -0,0 +1 @@
100 10 324 120.00 B 8.00 B 23.00 B

View File

@ -0,0 +1,7 @@
DROP TABLE IF EXISTS columns_transformers;
CREATE TABLE columns_transformers (i int, j int, k int, a_bytes int, b_bytes int, c_bytes int) Engine=TinyLog;
INSERT INTO columns_transformers VALUES (100, 10, 324, 120, 8, 23);
SELECT * EXCEPT 'bytes', COLUMNS('bytes') APPLY formatReadableSize FROM columns_transformers;
DROP TABLE IF EXISTS columns_transformers;