mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge pull request #61543 from ClickHouse/fix-regexp-recompilation-in-parser
Fix recompilation of regexps during backtracking in the parser
This commit is contained in:
commit
8670f1dc91
@ -1,18 +1,18 @@
|
||||
#include <Analyzer/ColumnTransformers.h>
|
||||
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/re2.h>
|
||||
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTAsterisk.h>
|
||||
#include <Parsers/ASTColumnsTransformers.h>
|
||||
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/LambdaNode.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -133,7 +133,7 @@ ExceptColumnTransformerNode::ExceptColumnTransformerNode(std::shared_ptr<re2::RE
|
||||
bool ExceptColumnTransformerNode::isColumnMatching(const std::string & column_name) const
|
||||
{
|
||||
if (column_matcher)
|
||||
return RE2::PartialMatch(column_name, *column_matcher);
|
||||
return re2::RE2::PartialMatch(column_name, *column_matcher);
|
||||
|
||||
for (const auto & name : except_column_names)
|
||||
if (column_name == name)
|
||||
|
@ -3,7 +3,12 @@
|
||||
#include <Analyzer/Identifier.h>
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/ListNode.h>
|
||||
#include <Common/re2.h>
|
||||
|
||||
|
||||
namespace re2
|
||||
{
|
||||
class RE2;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -102,8 +107,6 @@ enum class ApplyColumnTransformerType
|
||||
/// Get apply column transformer type name
|
||||
const char * toString(ApplyColumnTransformerType type);
|
||||
|
||||
class ApplyColumnTransformerNode;
|
||||
using ApplyColumnTransformerNodePtr = std::shared_ptr<ApplyColumnTransformerNode>;
|
||||
|
||||
/// Apply column transformer
|
||||
class ApplyColumnTransformerNode final : public IColumnTransformerNode
|
||||
@ -158,8 +161,6 @@ enum class ExceptColumnTransformerType
|
||||
|
||||
const char * toString(ExceptColumnTransformerType type);
|
||||
|
||||
class ExceptColumnTransformerNode;
|
||||
using ExceptColumnTransformerNodePtr = std::shared_ptr<ExceptColumnTransformerNode>;
|
||||
|
||||
/** Except column transformer.
|
||||
* Strict EXCEPT column transformer must use all column names during matched nodes transformation.
|
||||
@ -230,8 +231,6 @@ private:
|
||||
static constexpr size_t children_size = 0;
|
||||
};
|
||||
|
||||
class ReplaceColumnTransformerNode;
|
||||
using ReplaceColumnTransformerNodePtr = std::shared_ptr<ReplaceColumnTransformerNode>;
|
||||
|
||||
/** Replace column transformer.
|
||||
* Strict replace column transformer must use all replacements during matched nodes transformation.
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include <Common/SipHash.h>
|
||||
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
@ -14,9 +13,15 @@
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTColumnsTransformers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_COMPILE_REGEXP;
|
||||
}
|
||||
|
||||
const char * toString(MatcherNodeType matcher_node_type)
|
||||
{
|
||||
switch (matcher_node_type)
|
||||
@ -48,20 +53,20 @@ MatcherNode::MatcherNode(Identifier qualified_identifier_, ColumnTransformersNod
|
||||
{
|
||||
}
|
||||
|
||||
MatcherNode::MatcherNode(std::shared_ptr<re2::RE2> columns_matcher_, ColumnTransformersNodes column_transformers_)
|
||||
MatcherNode::MatcherNode(String pattern_, ColumnTransformersNodes column_transformers_)
|
||||
: MatcherNode(MatcherNodeType::COLUMNS_REGEXP,
|
||||
{} /*qualified_identifier*/,
|
||||
{} /*columns_identifiers*/,
|
||||
std::move(columns_matcher_),
|
||||
std::move(pattern_),
|
||||
std::move(column_transformers_))
|
||||
{
|
||||
}
|
||||
|
||||
MatcherNode::MatcherNode(Identifier qualified_identifier_, std::shared_ptr<re2::RE2> columns_matcher_, ColumnTransformersNodes column_transformers_)
|
||||
MatcherNode::MatcherNode(Identifier qualified_identifier_, String pattern_, ColumnTransformersNodes column_transformers_)
|
||||
: MatcherNode(MatcherNodeType::COLUMNS_REGEXP,
|
||||
std::move(qualified_identifier_),
|
||||
{} /*columns_identifiers*/,
|
||||
std::move(columns_matcher_),
|
||||
std::move(pattern_),
|
||||
std::move(column_transformers_))
|
||||
{
|
||||
}
|
||||
@ -87,14 +92,21 @@ MatcherNode::MatcherNode(Identifier qualified_identifier_, Identifiers columns_i
|
||||
MatcherNode::MatcherNode(MatcherNodeType matcher_type_,
|
||||
Identifier qualified_identifier_,
|
||||
Identifiers columns_identifiers_,
|
||||
std::shared_ptr<re2::RE2> columns_matcher_,
|
||||
std::optional<String> pattern_,
|
||||
ColumnTransformersNodes column_transformers_)
|
||||
: IQueryTreeNode(children_size)
|
||||
, matcher_type(matcher_type_)
|
||||
, qualified_identifier(qualified_identifier_)
|
||||
, columns_identifiers(columns_identifiers_)
|
||||
, columns_matcher(columns_matcher_)
|
||||
{
|
||||
if (pattern_)
|
||||
{
|
||||
columns_matcher = std::make_shared<re2::RE2>(*pattern_, re2::RE2::Quiet);
|
||||
if (!columns_matcher->ok())
|
||||
throw DB::Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
|
||||
"COLUMNS pattern {} cannot be compiled: {}", *pattern_, columns_matcher->error());
|
||||
}
|
||||
|
||||
auto column_transformers_list_node = std::make_shared<ListNode>();
|
||||
|
||||
auto & column_transformers_nodes = column_transformers_list_node->getNodes();
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Parsers/ASTAsterisk.h>
|
||||
#include <Common/re2.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -58,10 +59,10 @@ public:
|
||||
explicit MatcherNode(Identifier qualified_identifier_, ColumnTransformersNodes column_transformers_ = {});
|
||||
|
||||
/// Variant unqualified COLUMNS('regexp')
|
||||
explicit MatcherNode(std::shared_ptr<re2::RE2> columns_matcher_, ColumnTransformersNodes column_transformers_ = {});
|
||||
explicit MatcherNode(String pattern_, ColumnTransformersNodes column_transformers_ = {});
|
||||
|
||||
/// Variant qualified COLUMNS('regexp')
|
||||
explicit MatcherNode(Identifier qualified_identifier_, std::shared_ptr<re2::RE2> columns_matcher_, ColumnTransformersNodes column_transformers_ = {});
|
||||
explicit MatcherNode(Identifier qualified_identifier_, String pattern_, ColumnTransformersNodes column_transformers_ = {});
|
||||
|
||||
/// Variant unqualified COLUMNS(column_name_1, ...)
|
||||
explicit MatcherNode(Identifiers columns_identifiers_, ColumnTransformersNodes column_transformers_ = {});
|
||||
@ -81,12 +82,6 @@ public:
|
||||
return matcher_type == MatcherNodeType::ASTERISK;
|
||||
}
|
||||
|
||||
/// Returns true if matcher is columns regexp or columns list matcher, false otherwise
|
||||
bool isColumnsMatcher() const
|
||||
{
|
||||
return matcher_type == MatcherNodeType::COLUMNS_REGEXP || matcher_type == MatcherNodeType::COLUMNS_LIST;
|
||||
}
|
||||
|
||||
/// Returns true if matcher is qualified, false otherwise
|
||||
bool isQualified() const
|
||||
{
|
||||
@ -152,7 +147,7 @@ private:
|
||||
explicit MatcherNode(MatcherNodeType matcher_type_,
|
||||
Identifier qualified_identifier_,
|
||||
Identifiers columns_identifiers_,
|
||||
std::shared_ptr<re2::RE2> columns_matcher_,
|
||||
std::optional<String> pattern_,
|
||||
ColumnTransformersNodes column_transformers_);
|
||||
|
||||
MatcherNodeType matcher_type;
|
||||
|
@ -2,11 +2,7 @@
|
||||
|
||||
#include <Common/FieldVisitorToString.h>
|
||||
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/FieldToDataType.h>
|
||||
#include <Parsers/ParserSelectQuery.h>
|
||||
#include <Parsers/ParserSelectWithUnionQuery.h>
|
||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||
#include <Parsers/ASTSelectIntersectExceptQuery.h>
|
||||
@ -33,13 +29,11 @@
|
||||
#include <Analyzer/MatcherNode.h>
|
||||
#include <Analyzer/ColumnTransformers.h>
|
||||
#include <Analyzer/ConstantNode.h>
|
||||
#include <Analyzer/ColumnNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/LambdaNode.h>
|
||||
#include <Analyzer/SortNode.h>
|
||||
#include <Analyzer/InterpolateNode.h>
|
||||
#include <Analyzer/WindowNode.h>
|
||||
#include <Analyzer/TableNode.h>
|
||||
#include <Analyzer/TableFunctionNode.h>
|
||||
#include <Analyzer/QueryNode.h>
|
||||
#include <Analyzer/ArrayJoinNode.h>
|
||||
@ -50,7 +44,6 @@
|
||||
|
||||
#include <Interpreters/StorageID.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -661,7 +654,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
|
||||
else if (const auto * columns_regexp_matcher = expression->as<ASTColumnsRegexpMatcher>())
|
||||
{
|
||||
auto column_transformers = buildColumnTransformers(columns_regexp_matcher->transformers, context);
|
||||
result = std::make_shared<MatcherNode>(columns_regexp_matcher->getMatcher(), std::move(column_transformers));
|
||||
result = std::make_shared<MatcherNode>(columns_regexp_matcher->getPattern(), std::move(column_transformers));
|
||||
}
|
||||
else if (const auto * columns_list_matcher = expression->as<ASTColumnsListMatcher>())
|
||||
{
|
||||
@ -681,7 +674,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
|
||||
{
|
||||
auto & qualified_identifier = qualified_columns_regexp_matcher->qualifier->as<ASTIdentifier &>();
|
||||
auto column_transformers = buildColumnTransformers(qualified_columns_regexp_matcher->transformers, context);
|
||||
result = std::make_shared<MatcherNode>(Identifier(qualified_identifier.name_parts), qualified_columns_regexp_matcher->getMatcher(), std::move(column_transformers));
|
||||
result = std::make_shared<MatcherNode>(Identifier(qualified_identifier.name_parts), qualified_columns_regexp_matcher->getPattern(), std::move(column_transformers));
|
||||
}
|
||||
else if (const auto * qualified_columns_list_matcher = expression->as<ASTQualifiedColumnsListMatcher>())
|
||||
{
|
||||
|
@ -60,7 +60,7 @@ public:
|
||||
, replacement(replacement_string)
|
||||
{
|
||||
if (!regexp.ok())
|
||||
throw DB::Exception(DB::ErrorCodes::CANNOT_COMPILE_REGEXP,
|
||||
throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
|
||||
"SensitiveDataMasker: cannot compile re2: {}, error: {}. "
|
||||
"Look at https://github.com/google/re2/wiki/Syntax for reference.",
|
||||
regexp_string_, regexp.error());
|
||||
|
@ -15,11 +15,10 @@
|
||||
#include <Parsers/ASTColumnsTransformers.h>
|
||||
#include <Parsers/ASTQualifiedAsterisk.h>
|
||||
#include <Parsers/ParserTablesInSelectQuery.h>
|
||||
#include <Parsers/ExpressionListParsers.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Core/Defines.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/re2.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -30,6 +29,7 @@ namespace ErrorCodes
|
||||
extern const int AMBIGUOUS_COLUMN_NAME;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int UNKNOWN_IDENTIFIER;
|
||||
extern const int CANNOT_COMPILE_REGEXP;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -204,11 +204,17 @@ private:
|
||||
{
|
||||
has_asterisks = true;
|
||||
|
||||
String pattern = columns_regexp_matcher->getPattern();
|
||||
re2::RE2 regexp(pattern, re2::RE2::Quiet);
|
||||
if (!regexp.ok())
|
||||
throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
|
||||
"COLUMNS pattern {} cannot be compiled: {}", pattern, regexp.error());
|
||||
|
||||
for (auto & table_name : data.tables_order)
|
||||
data.addTableColumns(
|
||||
table_name,
|
||||
columns,
|
||||
[&](const String & column_name) { return columns_regexp_matcher->isColumnMatching(column_name); });
|
||||
[&](const String & column_name) { return re2::RE2::PartialMatch(column_name, regexp); });
|
||||
|
||||
if (columns_regexp_matcher->transformers)
|
||||
{
|
||||
|
@ -1,4 +1,3 @@
|
||||
#include <cstring>
|
||||
#include <Poco/String.h>
|
||||
|
||||
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
|
||||
@ -6,7 +5,6 @@
|
||||
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Core/Names.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
@ -21,6 +19,7 @@
|
||||
#include <Parsers/ASTColumnsMatcher.h>
|
||||
#include <Parsers/ASTColumnsTransformers.h>
|
||||
#include <Storages/StorageView.h>
|
||||
#include <Common/re2.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -31,7 +30,10 @@ namespace ErrorCodes
|
||||
extern const int UNKNOWN_IDENTIFIER;
|
||||
extern const int UNSUPPORTED_JOIN_KEYS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int CANNOT_COMPILE_REGEXP;
|
||||
}
|
||||
|
||||
|
||||
bool TranslateQualifiedNamesMatcher::Data::matchColumnName(std::string_view name, const String & column_name, DataTypePtr column_type)
|
||||
{
|
||||
if (name.size() < column_name.size())
|
||||
@ -275,12 +277,19 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
|
||||
}
|
||||
else if (const auto * asterisk_regexp_pattern = child->as<ASTColumnsRegexpMatcher>())
|
||||
{
|
||||
String pattern = asterisk_regexp_pattern->getPattern();
|
||||
re2::RE2 regexp(pattern, re2::RE2::Quiet);
|
||||
if (!regexp.ok())
|
||||
throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
|
||||
"COLUMNS pattern {} cannot be compiled: {}", pattern, regexp.error());
|
||||
|
||||
bool first_table = true;
|
||||
for (const auto & table : tables_with_columns)
|
||||
{
|
||||
for (const auto & column : table.columns)
|
||||
{
|
||||
if (asterisk_regexp_pattern->isColumnMatching(column.name) && (first_table || !data.join_using_columns.contains(column.name)))
|
||||
if (re2::RE2::PartialMatch(column.name, regexp)
|
||||
&& (first_table || !data.join_using_columns.contains(column.name)))
|
||||
{
|
||||
addIdentifier(columns, table.table, column.name);
|
||||
}
|
||||
|
@ -4,17 +4,11 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/re2.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_COMPILE_REGEXP;
|
||||
}
|
||||
|
||||
ASTPtr ASTColumnsRegexpMatcher::clone() const
|
||||
{
|
||||
auto clone = std::make_shared<ASTColumnsRegexpMatcher>(*this);
|
||||
@ -34,14 +28,14 @@ void ASTColumnsRegexpMatcher::appendColumnName(WriteBuffer & ostr) const
|
||||
writeCString(".", ostr);
|
||||
}
|
||||
writeCString("COLUMNS(", ostr);
|
||||
writeQuotedString(original_pattern, ostr);
|
||||
writeQuotedString(pattern, ostr);
|
||||
writeChar(')', ostr);
|
||||
}
|
||||
|
||||
void ASTColumnsRegexpMatcher::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
|
||||
{
|
||||
hash_state.update(original_pattern.size());
|
||||
hash_state.update(original_pattern);
|
||||
hash_state.update(pattern.size());
|
||||
hash_state.update(pattern);
|
||||
IAST::updateTreeHashImpl(hash_state, ignore_aliases);
|
||||
}
|
||||
|
||||
@ -56,7 +50,7 @@ void ASTColumnsRegexpMatcher::formatImpl(const FormatSettings & settings, Format
|
||||
}
|
||||
|
||||
settings.ostr << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
|
||||
settings.ostr << quoteString(original_pattern);
|
||||
settings.ostr << quoteString(pattern);
|
||||
settings.ostr << ")";
|
||||
|
||||
if (transformers)
|
||||
@ -65,28 +59,14 @@ void ASTColumnsRegexpMatcher::formatImpl(const FormatSettings & settings, Format
|
||||
}
|
||||
}
|
||||
|
||||
void ASTColumnsRegexpMatcher::setPattern(String pattern)
|
||||
void ASTColumnsRegexpMatcher::setPattern(String pattern_)
|
||||
{
|
||||
original_pattern = std::move(pattern);
|
||||
column_matcher = std::make_shared<RE2>(original_pattern, RE2::Quiet);
|
||||
if (!column_matcher->ok())
|
||||
throw DB::Exception(DB::ErrorCodes::CANNOT_COMPILE_REGEXP,
|
||||
"COLUMNS pattern {} cannot be compiled: {}", original_pattern, column_matcher->error());
|
||||
pattern = std::move(pattern_);
|
||||
}
|
||||
|
||||
const String & ASTColumnsRegexpMatcher::getPattern() const
|
||||
{
|
||||
return original_pattern;
|
||||
}
|
||||
|
||||
const std::shared_ptr<re2::RE2> & ASTColumnsRegexpMatcher::getMatcher() const
|
||||
{
|
||||
return column_matcher;
|
||||
}
|
||||
|
||||
bool ASTColumnsRegexpMatcher::isColumnMatching(const String & column_name) const
|
||||
{
|
||||
return RE2::PartialMatch(column_name, *column_matcher);
|
||||
return pattern;
|
||||
}
|
||||
|
||||
ASTPtr ASTColumnsListMatcher::clone() const
|
||||
@ -166,37 +146,24 @@ void ASTQualifiedColumnsRegexpMatcher::appendColumnName(WriteBuffer & ostr) cons
|
||||
{
|
||||
qualifier->appendColumnName(ostr);
|
||||
writeCString(".COLUMNS(", ostr);
|
||||
writeQuotedString(original_pattern, ostr);
|
||||
writeQuotedString(pattern, ostr);
|
||||
writeChar(')', ostr);
|
||||
}
|
||||
|
||||
void ASTQualifiedColumnsRegexpMatcher::setPattern(String pattern, bool set_matcher)
|
||||
void ASTQualifiedColumnsRegexpMatcher::setPattern(String pattern_)
|
||||
{
|
||||
original_pattern = std::move(pattern);
|
||||
|
||||
if (!set_matcher)
|
||||
return;
|
||||
|
||||
column_matcher = std::make_shared<RE2>(original_pattern, RE2::Quiet);
|
||||
if (!column_matcher->ok())
|
||||
throw DB::Exception(DB::ErrorCodes::CANNOT_COMPILE_REGEXP,
|
||||
"COLUMNS pattern {} cannot be compiled: {}", original_pattern, column_matcher->error());
|
||||
pattern = std::move(pattern_);
|
||||
}
|
||||
|
||||
void ASTQualifiedColumnsRegexpMatcher::setMatcher(std::shared_ptr<re2::RE2> matcher)
|
||||
const String & ASTQualifiedColumnsRegexpMatcher::getPattern() const
|
||||
{
|
||||
column_matcher = std::move(matcher);
|
||||
}
|
||||
|
||||
const std::shared_ptr<re2::RE2> & ASTQualifiedColumnsRegexpMatcher::getMatcher() const
|
||||
{
|
||||
return column_matcher;
|
||||
return pattern;
|
||||
}
|
||||
|
||||
void ASTQualifiedColumnsRegexpMatcher::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
|
||||
{
|
||||
hash_state.update(original_pattern.size());
|
||||
hash_state.update(original_pattern);
|
||||
hash_state.update(pattern.size());
|
||||
hash_state.update(pattern);
|
||||
IAST::updateTreeHashImpl(hash_state, ignore_aliases);
|
||||
}
|
||||
|
||||
@ -207,7 +174,7 @@ void ASTQualifiedColumnsRegexpMatcher::formatImpl(const FormatSettings & setting
|
||||
qualifier->formatImpl(settings, state, frame);
|
||||
|
||||
settings.ostr << ".COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
|
||||
settings.ostr << quoteString(original_pattern);
|
||||
settings.ostr << quoteString(pattern);
|
||||
settings.ostr << ")";
|
||||
|
||||
if (transformers)
|
||||
|
@ -2,11 +2,6 @@
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
|
||||
namespace re2
|
||||
{
|
||||
class RE2;
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -25,8 +20,6 @@ public:
|
||||
void appendColumnName(WriteBuffer & ostr) const override;
|
||||
void setPattern(String pattern);
|
||||
const String & getPattern() const;
|
||||
const std::shared_ptr<re2::RE2> & getMatcher() const;
|
||||
bool isColumnMatching(const String & column_name) const;
|
||||
void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
|
||||
|
||||
ASTPtr expression;
|
||||
@ -35,8 +28,7 @@ protected:
|
||||
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<re2::RE2> column_matcher;
|
||||
String original_pattern;
|
||||
String pattern;
|
||||
};
|
||||
|
||||
/// Same as the above but use a list of column names to do matching.
|
||||
@ -62,9 +54,8 @@ public:
|
||||
ASTPtr clone() const override;
|
||||
|
||||
void appendColumnName(WriteBuffer & ostr) const override;
|
||||
const std::shared_ptr<re2::RE2> & getMatcher() const;
|
||||
void setPattern(String pattern, bool set_matcher = true);
|
||||
void setMatcher(std::shared_ptr<re2::RE2> matcher);
|
||||
void setPattern(String pattern_);
|
||||
const String & getPattern() const;
|
||||
void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
|
||||
|
||||
ASTPtr qualifier;
|
||||
@ -73,8 +64,7 @@ protected:
|
||||
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<re2::RE2> column_matcher;
|
||||
String original_pattern;
|
||||
String pattern;
|
||||
};
|
||||
|
||||
/// Same as ASTColumnsListMatcher. Qualified identifier is first child.
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
@ -179,8 +180,8 @@ void ASTColumnsExceptTransformer::formatImpl(const FormatSettings & settings, Fo
|
||||
(*it)->formatImpl(settings, state, frame);
|
||||
}
|
||||
|
||||
if (!original_pattern.empty())
|
||||
settings.ostr << quoteString(original_pattern);
|
||||
if (pattern)
|
||||
settings.ostr << quoteString(*pattern);
|
||||
|
||||
if (children.size() > 1)
|
||||
settings.ostr << ")";
|
||||
@ -202,8 +203,8 @@ void ASTColumnsExceptTransformer::appendColumnName(WriteBuffer & ostr) const
|
||||
(*it)->appendColumnName(ostr);
|
||||
}
|
||||
|
||||
if (!original_pattern.empty())
|
||||
writeQuotedString(original_pattern, ostr);
|
||||
if (pattern)
|
||||
writeQuotedString(*pattern, ostr);
|
||||
|
||||
if (children.size() > 1)
|
||||
writeChar(')', ostr);
|
||||
@ -212,8 +213,11 @@ void ASTColumnsExceptTransformer::appendColumnName(WriteBuffer & ostr) const
|
||||
void ASTColumnsExceptTransformer::updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const
|
||||
{
|
||||
hash_state.update(is_strict);
|
||||
hash_state.update(original_pattern.size());
|
||||
hash_state.update(original_pattern);
|
||||
if (pattern)
|
||||
{
|
||||
hash_state.update(pattern->size());
|
||||
hash_state.update(*pattern);
|
||||
}
|
||||
|
||||
IAST::updateTreeHashImpl(hash_state, ignore_aliases);
|
||||
}
|
||||
@ -221,7 +225,7 @@ void ASTColumnsExceptTransformer::updateTreeHashImpl(SipHash & hash_state, bool
|
||||
void ASTColumnsExceptTransformer::transform(ASTs & nodes) const
|
||||
{
|
||||
std::set<String> expected_columns;
|
||||
if (original_pattern.empty())
|
||||
if (!pattern)
|
||||
{
|
||||
for (const auto & child : children)
|
||||
expected_columns.insert(child->as<const ASTIdentifier &>().name());
|
||||
@ -243,11 +247,13 @@ void ASTColumnsExceptTransformer::transform(ASTs & nodes) const
|
||||
}
|
||||
else
|
||||
{
|
||||
auto regexp = getMatcher();
|
||||
|
||||
for (auto * it = nodes.begin(); it != nodes.end();)
|
||||
{
|
||||
if (const auto * id = it->get()->as<ASTIdentifier>())
|
||||
{
|
||||
if (isColumnMatching(id->shortName()))
|
||||
if (RE2::PartialMatch(id->shortName(), *regexp))
|
||||
{
|
||||
it = nodes.erase(it);
|
||||
continue;
|
||||
@ -268,23 +274,21 @@ void ASTColumnsExceptTransformer::transform(ASTs & nodes) const
|
||||
}
|
||||
}
|
||||
|
||||
void ASTColumnsExceptTransformer::setPattern(String pattern)
|
||||
void ASTColumnsExceptTransformer::setPattern(String pattern_)
|
||||
{
|
||||
original_pattern = std::move(pattern);
|
||||
column_matcher = std::make_shared<RE2>(original_pattern, RE2::Quiet);
|
||||
if (!column_matcher->ok())
|
||||
throw DB::Exception(DB::ErrorCodes::CANNOT_COMPILE_REGEXP, "COLUMNS pattern {} cannot be compiled: {}",
|
||||
original_pattern, column_matcher->error());
|
||||
pattern = std::move(pattern_);
|
||||
}
|
||||
|
||||
const std::shared_ptr<re2::RE2> & ASTColumnsExceptTransformer::getMatcher() const
|
||||
std::shared_ptr<re2::RE2> ASTColumnsExceptTransformer::getMatcher() const
|
||||
{
|
||||
return column_matcher;
|
||||
}
|
||||
if (!pattern)
|
||||
return {};
|
||||
|
||||
bool ASTColumnsExceptTransformer::isColumnMatching(const String & column_name) const
|
||||
{
|
||||
return RE2::PartialMatch(column_name, *column_matcher);
|
||||
auto regexp = std::make_shared<re2::RE2>(*pattern, re2::RE2::Quiet);
|
||||
if (!regexp->ok())
|
||||
throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
|
||||
"COLUMNS pattern {} cannot be compiled: {}", *pattern, regexp->error());
|
||||
return regexp;
|
||||
}
|
||||
|
||||
void ASTColumnsReplaceTransformer::Replacement::formatImpl(
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
|
||||
|
||||
namespace re2
|
||||
{
|
||||
class RE2;
|
||||
@ -76,16 +77,14 @@ public:
|
||||
return clone;
|
||||
}
|
||||
void transform(ASTs & nodes) const override;
|
||||
void setPattern(String pattern);
|
||||
const std::shared_ptr<re2::RE2> & getMatcher() const;
|
||||
bool isColumnMatching(const String & column_name) const;
|
||||
void setPattern(String pattern_);
|
||||
std::shared_ptr<re2::RE2> getMatcher() const;
|
||||
void appendColumnName(WriteBuffer & ostr) const override;
|
||||
void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override;
|
||||
|
||||
protected:
|
||||
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
|
||||
std::shared_ptr<re2::RE2> column_matcher;
|
||||
String original_pattern;
|
||||
std::optional<String> pattern;
|
||||
};
|
||||
|
||||
class ASTColumnsReplaceTransformer : public IASTColumnsTransformer
|
||||
|
@ -36,7 +36,6 @@
|
||||
#include <Parsers/ExpressionListParsers.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Parsers/ParserSelectWithUnionQuery.h>
|
||||
#include <Parsers/ParserCase.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/ParserExplainQuery.h>
|
||||
@ -44,6 +43,7 @@
|
||||
|
||||
#include <Interpreters/StorageID.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -1628,7 +1628,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e
|
||||
is_strict = true;
|
||||
|
||||
ASTs identifiers;
|
||||
ASTPtr regex_node;
|
||||
ASTPtr regexp_node;
|
||||
ParserStringLiteral regex;
|
||||
auto parse_id = [&identifiers, &pos, &expected]
|
||||
{
|
||||
@ -1644,7 +1644,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e
|
||||
{
|
||||
// support one or more parameter
|
||||
++pos;
|
||||
if (!ParserList::parseUtil(pos, expected, parse_id, false) && !regex.parse(pos, regex_node, expected))
|
||||
if (!ParserList::parseUtil(pos, expected, parse_id, false) && !regex.parse(pos, regexp_node, expected))
|
||||
return false;
|
||||
|
||||
if (pos->type != TokenType::ClosingRoundBracket)
|
||||
@ -1654,13 +1654,13 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e
|
||||
else
|
||||
{
|
||||
// only one parameter
|
||||
if (!parse_id() && !regex.parse(pos, regex_node, expected))
|
||||
if (!parse_id() && !regex.parse(pos, regexp_node, expected))
|
||||
return false;
|
||||
}
|
||||
|
||||
auto res = std::make_shared<ASTColumnsExceptTransformer>();
|
||||
if (regex_node)
|
||||
res->setPattern(regex_node->as<ASTLiteral &>().value.get<String>());
|
||||
if (regexp_node)
|
||||
res->setPattern(regexp_node->as<ASTLiteral &>().value.get<String>());
|
||||
else
|
||||
res->children = std::move(identifiers);
|
||||
res->is_strict = is_strict;
|
||||
@ -1794,11 +1794,11 @@ static bool parseColumnsMatcherBody(IParser::Pos & pos, ASTPtr & node, Expected
|
||||
++pos;
|
||||
|
||||
ParserList columns_p(std::make_unique<ParserCompoundIdentifier>(false, true), std::make_unique<ParserToken>(TokenType::Comma), false);
|
||||
ParserStringLiteral regex;
|
||||
ParserStringLiteral regexp;
|
||||
|
||||
ASTPtr column_list;
|
||||
ASTPtr regex_node;
|
||||
if (!columns_p.parse(pos, column_list, expected) && !regex.parse(pos, regex_node, expected))
|
||||
ASTPtr regexp_node;
|
||||
if (!columns_p.parse(pos, column_list, expected) && !regexp.parse(pos, regexp_node, expected))
|
||||
return false;
|
||||
|
||||
if (pos->type != TokenType::ClosingRoundBracket)
|
||||
@ -1832,7 +1832,7 @@ static bool parseColumnsMatcherBody(IParser::Pos & pos, ASTPtr & node, Expected
|
||||
else
|
||||
{
|
||||
auto regexp_matcher = std::make_shared<ASTColumnsRegexpMatcher>();
|
||||
regexp_matcher->setPattern(regex_node->as<ASTLiteral &>().value.get<String>());
|
||||
regexp_matcher->setPattern(regexp_node->as<ASTLiteral &>().value.get<String>());
|
||||
|
||||
if (!transformers->children.empty())
|
||||
{
|
||||
@ -1895,8 +1895,7 @@ bool ParserQualifiedColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected
|
||||
else if (auto * column_regexp_matcher = node->as<ASTColumnsRegexpMatcher>())
|
||||
{
|
||||
auto result = std::make_shared<ASTQualifiedColumnsRegexpMatcher>();
|
||||
result->setPattern(column_regexp_matcher->getPattern(), false);
|
||||
result->setMatcher(column_regexp_matcher->getMatcher());
|
||||
result->setPattern(column_regexp_matcher->getPattern());
|
||||
|
||||
result->qualifier = std::move(identifier_node);
|
||||
result->children.push_back(result->qualifier);
|
||||
|
@ -52,7 +52,7 @@ public:
|
||||
: string_to_string_regexp(settings_.avro.string_column_pattern)
|
||||
{
|
||||
if (!string_to_string_regexp.ok())
|
||||
throw DB::Exception(DB::ErrorCodes::CANNOT_COMPILE_REGEXP, "Avro: cannot compile re2: {}, error: {}. "
|
||||
throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, "Avro: cannot compile re2: {}, error: {}. "
|
||||
"Look at https://github.com/google/re2/wiki/Syntax for reference.",
|
||||
settings_.avro.string_column_pattern, string_to_string_regexp.error());
|
||||
}
|
||||
|
@ -0,0 +1 @@
|
||||
SYNTAX_ERROR
|
9
tests/queries/0_stateless/03013_parser_regexp_recompilation.sh
Executable file
9
tests/queries/0_stateless/03013_parser_regexp_recompilation.sh
Executable file
@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
# This query is incorrect, but its parsing should not be slow (tens of seconds):
|
||||
|
||||
${CLICKHOUSE_LOCAL} < "$CURDIR"/03013_parser_regexp_recompilation.txt 2>&1 | grep --text -o -F 'SYNTAX_ERROR'
|
BIN
tests/queries/0_stateless/03013_parser_regexp_recompilation.txt
Normal file
BIN
tests/queries/0_stateless/03013_parser_regexp_recompilation.txt
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user