Merge pull request #18699 from amosbird/exceptregex

column excepts with regex
This commit is contained in:
alexey-milovidov 2021-01-06 21:28:09 +03:00 committed by GitHub
commit c944035f0b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 74 additions and 16 deletions

View File

@ -6,6 +6,7 @@
#include <Common/SipHash.h>
#include <Common/quoteString.h>
#include <IO/Operators.h>
#include <re2/re2.h>
namespace DB
@ -14,6 +15,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NO_SUCH_COLUMN_IN_TABLE;
extern const int CANNOT_COMPILE_REGEXP;
}
void IASTColumnsTransformer::transform(const ASTPtr & transformer, ASTs & nodes)
@ -86,6 +88,9 @@ void ASTColumnsExceptTransformer::formatImpl(const FormatSettings & settings, Fo
(*it)->formatImpl(settings, state, frame);
}
if (!original_pattern.empty())
settings.ostr << quoteString(original_pattern);
if (children.size() > 1)
settings.ostr << ")";
}
@ -93,24 +98,40 @@ void ASTColumnsExceptTransformer::formatImpl(const FormatSettings & settings, Fo
void ASTColumnsExceptTransformer::transform(ASTs & nodes) const
{
std::set<String> expected_columns;
for (const auto & child : children)
expected_columns.insert(child->as<const ASTIdentifier &>().name());
for (auto it = nodes.begin(); it != nodes.end();)
if (original_pattern.empty())
{
if (const auto * id = it->get()->as<ASTIdentifier>())
for (const auto & child : children)
expected_columns.insert(child->as<const ASTIdentifier &>().name());
for (auto it = nodes.begin(); it != nodes.end();)
{
auto expected_column = expected_columns.find(id->shortName());
if (expected_column != expected_columns.end())
if (const auto * id = it->get()->as<ASTIdentifier>())
{
expected_columns.erase(expected_column);
it = nodes.erase(it);
auto expected_column = expected_columns.find(id->shortName());
if (expected_column != expected_columns.end())
{
expected_columns.erase(expected_column);
it = nodes.erase(it);
continue;
}
}
else
++it;
}
else
++it;
}
}
else
{
for (auto it = nodes.begin(); it != nodes.end();)
{
if (const auto * id = it->get()->as<ASTIdentifier>())
{
if (isColumnMatching(id->shortName()))
{
it = nodes.erase(it);
continue;
}
}
++it;
}
}
if (is_strict && !expected_columns.empty())
@ -125,6 +146,21 @@ void ASTColumnsExceptTransformer::transform(ASTs & nodes) const
}
}
void ASTColumnsExceptTransformer::setPattern(String pattern)
{
original_pattern = std::move(pattern);
column_matcher = std::make_shared<RE2>(original_pattern, RE2::Quiet);
if (!column_matcher->ok())
throw DB::Exception(
"COLUMNS pattern " + original_pattern + " cannot be compiled: " + column_matcher->error(),
DB::ErrorCodes::CANNOT_COMPILE_REGEXP);
}
bool ASTColumnsExceptTransformer::isColumnMatching(const String & column_name) const
{
return RE2::PartialMatch(column_name, *column_matcher);
}
void ASTColumnsReplaceTransformer::Replacement::formatImpl(
const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{

View File

@ -2,6 +2,11 @@
#include <Parsers/IAST.h>
namespace re2
{
class RE2;
}
namespace DB
{
class IASTColumnsTransformer : public IAST
@ -43,9 +48,13 @@ public:
return clone;
}
void transform(ASTs & nodes) const override;
void setPattern(String pattern);
bool isColumnMatching(const String & column_name) const;
protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
std::shared_ptr<re2::RE2> column_matcher;
String original_pattern;
};
class ASTColumnsReplaceTransformer : public IASTColumnsTransformer

View File

@ -1447,6 +1447,8 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e
is_strict = true;
ASTs identifiers;
ASTPtr regex_node;
ParserStringLiteral regex;
auto parse_id = [&identifiers, &pos, &expected]
{
ASTPtr identifier;
@ -1461,7 +1463,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e
{
// support one or more parameter
++pos;
if (!ParserList::parseUtil(pos, expected, parse_id, false))
if (!ParserList::parseUtil(pos, expected, parse_id, false) && !regex.parse(pos, regex_node, expected))
return false;
if (pos->type != TokenType::ClosingRoundBracket)
@ -1471,12 +1473,15 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e
else
{
// only one parameter
if (!parse_id())
if (!parse_id() && !regex.parse(pos, regex_node, expected))
return false;
}
auto res = std::make_shared<ASTColumnsExceptTransformer>();
res->children = std::move(identifiers);
if (regex_node)
res->setPattern(regex_node->as<ASTLiteral &>().value.get<String>());
else
res->children = std::move(identifiers);
res->is_strict = is_strict;
node = std::move(res);
return true;

View File

@ -0,0 +1 @@
100 10 324 120.00 B 8.00 B 23.00 B

View File

@ -0,0 +1,7 @@
DROP TABLE IF EXISTS columns_transformers;
CREATE TABLE columns_transformers (i int, j int, k int, a_bytes int, b_bytes int, c_bytes int) Engine=TinyLog;
INSERT INTO columns_transformers VALUES (100, 10, 324, 120, 8, 23);
SELECT * EXCEPT 'bytes', COLUMNS('bytes') APPLY formatReadableSize FROM columns_transformers;
DROP TABLE IF EXISTS columns_transformers;