parse tuple as literal if possible

This commit is contained in:
Anton Popov 2020-03-19 04:15:01 +03:00
parent 1c98210b71
commit f08771e563
5 changed files with 97 additions and 26 deletions

View File

@ -16,17 +16,21 @@ void ASTLiteral::updateTreeHashImpl(SipHash & hash_state) const
void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const
{
/// Special case for very large arrays. Instead of listing all elements, will use hash of them.
/// 100 - just arbitrary value.
constexpr auto MIN_ELEMENTS_FOR_HASHING = 100;
/// Special case for very large arrays and tuples. Instead of listing all elements, will use hash of them.
/// (Otherwise column name will be too long, that will lead to significant slowdown of expression analysis.)
if (value.getType() == Field::Types::Array
&& value.get<const Array &>().size() > 100) /// 100 - just arbitrary value.
auto type = value.getType();
if ((type == Field::Types::Array && value.get<const Array &>().size() > MIN_ELEMENTS_FOR_HASHING)
|| (type == Field::Types::Tuple && value.get<const Tuple &>().size() > MIN_ELEMENTS_FOR_HASHING))
{
SipHash hash;
applyVisitor(FieldVisitorHash(hash), value);
UInt64 low, high;
hash.get128(low, high);
writeCString("__array_", ostr);
writeCString(type == Field::Types::Array ? "__array_" : "__tuple_", ostr);
writeText(low, ostr);
ostr.write('_');
writeText(high, ostr);

View File

@ -990,15 +990,15 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
return true;
}
bool ParserArrayOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
template <typename Collection>
bool ParserCollectionOfLiterals<Collection>::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (pos->type != TokenType::OpeningSquareBracket)
if (pos->type != opening_bracket)
return false;
Pos literal_begin = pos;
Array arr;
Collection arr;
ParserLiteral literal_p;
@ -1008,9 +1008,16 @@ bool ParserArrayOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
{
if (!arr.empty())
{
if (pos->type == TokenType::ClosingSquareBracket)
if (pos->type == closing_bracket)
{
auto literal = std::make_shared<ASTLiteral>(arr);
std::shared_ptr<ASTLiteral> literal;
/// Parse one-element tuples (e.g. (1)) as single values for backward compatibility.
if (std::is_same_v<Collection, Tuple> && arr.size() == 1)
literal = std::make_shared<ASTLiteral>(arr[0]);
else
literal = std::make_shared<ASTLiteral>(arr);
literal->begin = literal_begin;
literal->end = ++pos;
node = literal;
@ -1022,7 +1029,9 @@ bool ParserArrayOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
}
else
{
expected.add(pos, "comma or closing square bracket");
std::stringstream msg;
msg << "comma or " << getTokenName(closing_bracket);
expected.add(pos, msg.str().c_str());
return false;
}
}
@ -1034,7 +1043,7 @@ bool ParserArrayOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
arr.push_back(literal_node->as<ASTLiteral &>().value);
}
expected.add(pos, "closing square bracket");
expected.add(pos, getTokenName(closing_bracket));
return false;
}
@ -1235,6 +1244,7 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
return ParserSubquery().parse(pos, node, expected)
|| ParserTupleOfLiterals().parse(pos, node, expected)
|| ParserParenthesisExpression().parse(pos, node, expected)
|| ParserArrayOfLiterals().parse(pos, node, expected)
|| ParserArray().parse(pos, node, expected)

View File

@ -1,5 +1,6 @@
#pragma once
#include <Core/Field.h>
#include <Parsers/IParserBase.h>
@ -217,17 +218,49 @@ protected:
};
/** An array of literals.
* Arrays can also be parsed as an application of [] operator.
* But parsing the whole array as a whole constant seriously speeds up the analysis of expressions in the case of very large arrays.
* We try to parse the array as an array of literals first (fast path),
* and if it did not work out (when the array consists of complex expressions) - parse as an application of [] operator (slow path).
/** An array or tuple of literals.
* Arrays can also be parsed as an application of [] operator and tuples as an application of 'tuple' function.
* But parsing the whole array/tuple as a whole constant seriously speeds up the analysis of expressions in the case of very large collection.
* We try to parse the array or tuple as a collection of literals first (fast path),
* and if it did not work out (when the collection consists of complex expressions) -
* parse as an application of [] operator or 'tuple' function (slow path).
*/
template <typename Collection>
class ParserCollectionOfLiterals : public IParserBase
{
public:
ParserCollectionOfLiterals(TokenType opening_bracket_, TokenType closing_bracket_)
: opening_bracket(opening_bracket_), closing_bracket(closing_bracket_) {}
protected:
const char * getName() const override { return "collection of literals"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
private:
TokenType opening_bracket;
TokenType closing_bracket;
};
class ParserTupleOfLiterals : public IParserBase
{
public:
ParserCollectionOfLiterals<Tuple> tuple_parser{TokenType::OpeningRoundBracket, TokenType::ClosingRoundBracket};
protected:
const char * getName() const override { return "tuple"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
{
return tuple_parser.parse(pos, node, expected);
}
};
class ParserArrayOfLiterals : public IParserBase
{
public:
ParserCollectionOfLiterals<Array> array_parser{TokenType::OpeningSquareBracket, TokenType::ClosingSquareBracket};
protected:
const char * getName() const override { return "array"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
{
return array_parser.parse(pos, node, expected);
}
};

View File

@ -38,14 +38,34 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
String fields_str;
const auto * tuple_ast = value->as<ASTFunction>();
bool surrounded_by_parens = false;
if (tuple_ast && tuple_ast->name == "tuple")
{
surrounded_by_parens = true;
const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
if (arguments_ast)
fields_count = arguments_ast->children.size();
else
fields_count = 0;
}
else if (auto literal = value->as<ASTLiteral>())
{
if (literal->value.getType() == Field::Types::Tuple)
{
surrounded_by_parens = true;
fields_count = literal->value.get<Tuple &>().size();
}
else
{
fields_count = 1;
fields_str = String(begin->begin, pos->begin - begin->begin);
}
}
else
return false;
if (surrounded_by_parens)
{
Pos left_paren = begin;
Pos right_paren = pos;
@ -61,13 +81,6 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
fields_str = String(left_paren->end, right_paren->begin - left_paren->end);
}
else if (value->as<ASTLiteral>())
{
fields_count = 1;
fields_str = String(begin->begin, pos->begin - begin->begin);
}
else
return false;
partition->value = value;
partition->children.push_back(value);

File diff suppressed because one or more lines are too long