Fix column matcher and column transformer

This commit is contained in:
Amos Bird 2022-04-20 01:22:04 +08:00
parent 38be34726c
commit 158a25d5fa
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
12 changed files with 358 additions and 90 deletions

View File

@ -12,6 +12,7 @@
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTAsterisk.h>
#include <Parsers/ASTColumnsMatcher.h>
#include <Parsers/ASTColumnsTransformers.h>
#include <Parsers/ASTQualifiedAsterisk.h>
#include <Parsers/ParserTablesInSelectQuery.h>
#include <Parsers/ExpressionListParsers.h>
@ -81,6 +82,7 @@ public:
/// By default should_add_column_predicate returns true for any column name
void addTableColumns(
const String & table_name,
ASTs & columns,
ShouldAddColumnPredicate should_add_column_predicate = [](const String &) { return true; })
{
auto it = table_columns.find(table_name);
@ -105,7 +107,7 @@ public:
else
identifier = std::make_shared<ASTIdentifier>(std::vector<String>{it->first, column.name});
new_select_expression_list->children.emplace_back(std::move(identifier));
columns.emplace_back(std::move(identifier));
}
}
}
@ -129,14 +131,18 @@ private:
for (const auto & child : node.children)
{
if (child->as<ASTAsterisk>())
ASTs columns;
if (const auto * asterisk = child->as<ASTAsterisk>())
{
has_asterisks = true;
for (auto & table_name : data.tables_order)
data.addTableColumns(table_name);
data.addTableColumns(table_name, columns);
for (const auto & transformer : asterisk->children)
IASTColumnsTransformer::transform(transformer, columns);
}
else if (child->as<ASTQualifiedAsterisk>())
else if (const auto * qualified_asterisk = child->as<ASTQualifiedAsterisk>())
{
has_asterisks = true;
@ -144,17 +150,44 @@ private:
throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR);
auto & identifier = child->children[0]->as<ASTTableIdentifier &>();
data.addTableColumns(identifier.name());
data.addTableColumns(identifier.name(), columns);
// QualifiedAsterisk's transformers start to appear at child 1
for (auto it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it)
{
IASTColumnsTransformer::transform(*it, columns);
}
else if (auto * columns_matcher = child->as<ASTColumnsMatcher>())
}
else if (const auto * columns_list_matcher = child->as<ASTColumnsListMatcher>())
{
has_asterisks = true;
for (const auto & ident : columns_list_matcher->column_list->children)
columns.emplace_back(ident->clone());
for (const auto & transformer : columns_list_matcher->children)
IASTColumnsTransformer::transform(transformer, columns);
}
else if (const auto * columns_regexp_matcher = child->as<ASTColumnsRegexpMatcher>())
{
has_asterisks = true;
for (auto & table_name : data.tables_order)
data.addTableColumns(table_name, [&](const String & column_name) { return columns_matcher->isColumnMatching(column_name); });
data.addTableColumns(
table_name,
columns,
[&](const String & column_name) { return columns_regexp_matcher->isColumnMatching(column_name); });
for (const auto & transformer : columns_regexp_matcher->children)
IASTColumnsTransformer::transform(transformer, columns);
}
else
data.new_select_expression_list->children.push_back(child);
data.new_select_expression_list->children.insert(
data.new_select_expression_list->children.end(),
std::make_move_iterator(columns.begin()),
std::make_move_iterator(columns.end()));
}
if (!has_asterisks)

View File

@ -96,8 +96,8 @@ void PredicateRewriteVisitorData::visitOtherInternalSelect(ASTSelectQuery & sele
size_t alias_index = 0;
for (auto & ref_select : temp_select_query->refSelect()->children)
{
if (!ref_select->as<ASTAsterisk>() && !ref_select->as<ASTQualifiedAsterisk>() && !ref_select->as<ASTColumnsMatcher>() &&
!ref_select->as<ASTIdentifier>())
if (!ref_select->as<ASTAsterisk>() && !ref_select->as<ASTQualifiedAsterisk>() && !ref_select->as<ASTColumnsListMatcher>()
&& !ref_select->as<ASTColumnsRegexpMatcher>() && !ref_select->as<ASTIdentifier>())
{
if (const auto & alias = ref_select->tryGetAlias(); alias.empty())
ref_select->setAlias("--predicate_optimizer_" + toString(alias_index++));

View File

@ -196,7 +196,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
bool has_asterisk = false;
for (const auto & child : node.children)
{
if (child->as<ASTAsterisk>() || child->as<ASTColumnsMatcher>())
if (child->as<ASTAsterisk>() || child->as<ASTColumnsListMatcher>() || child->as<ASTColumnsRegexpMatcher>())
{
if (tables_with_columns.empty())
throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR);
@ -229,47 +229,38 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
for (const auto & column : *cols)
{
if (first_table || !data.join_using_columns.count(column.name))
{
addIdentifier(columns, table.table, column.name);
}
}
}
first_table = false;
}
for (const auto & transformer : asterisk->children)
{
IASTColumnsTransformer::transform(transformer, columns);
}
}
else if (const auto * asterisk_pattern = child->as<ASTColumnsMatcher>())
else if (auto * asterisk_column_list = child->as<ASTColumnsListMatcher>())
{
if (asterisk_pattern->column_list)
{
for (const auto & ident : asterisk_pattern->column_list->children)
for (const auto & ident : asterisk_column_list->column_list->children)
columns.emplace_back(ident->clone());
for (const auto & transformer : asterisk_column_list->children)
IASTColumnsTransformer::transform(transformer, columns);
}
else
else if (const auto * asterisk_regexp_pattern = child->as<ASTColumnsRegexpMatcher>())
{
bool first_table = true;
for (const auto & table : tables_with_columns)
{
for (const auto & column : table.columns)
{
if (asterisk_pattern->isColumnMatching(column.name) && (first_table || !data.join_using_columns.count(column.name)))
{
if (asterisk_regexp_pattern->isColumnMatching(column.name) && (first_table || !data.join_using_columns.count(column.name)))
addIdentifier(columns, table.table, column.name);
}
}
first_table = false;
}
}
// ColumnsMatcher's transformers start to appear at child 1
for (auto it = asterisk_pattern->children.begin() + 1; it != asterisk_pattern->children.end(); ++it)
{
IASTColumnsTransformer::transform(*it, columns);
}
for (const auto & transformer : asterisk_regexp_pattern->children)
IASTColumnsTransformer::transform(transformer, columns);
}
else if (const auto * qualified_asterisk = child->as<ASTQualifiedAsterisk>())
{
@ -280,12 +271,11 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
if (ident_db_and_name.satisfies(table.table, true))
{
for (const auto & column : table.columns)
{
addIdentifier(columns, table.table, column.name);
}
break;
}
}
// QualifiedAsterisk's transformers start to appear at child 1
for (auto it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it)
{

View File

@ -17,6 +17,8 @@ void ASTAsterisk::appendColumnName(WriteBuffer & ostr) const { ostr.write('*');
void ASTAsterisk::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
settings.ostr << "*";
/// Format column transformers
for (const auto & child : children)
{
settings.ostr << ' ';

View File

@ -1,64 +1,117 @@
#include "ASTColumnsMatcher.h"
#include <Parsers/ASTColumnsMatcher.h>
#include <IO/Operators.h>
#include <IO/WriteHelpers.h>
#include <Common/quoteString.h>
#include <re2/re2.h>
#include <Common/SipHash.h>
#include <IO/Operators.h>
#include <Common/quoteString.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_COMPILE_REGEXP;
}
ASTPtr ASTColumnsMatcher::clone() const
ASTPtr ASTColumnsRegexpMatcher::clone() const
{
auto clone = std::make_shared<ASTColumnsMatcher>(*this);
auto clone = std::make_shared<ASTColumnsRegexpMatcher>(*this);
clone->cloneChildren();
return clone;
}
void ASTColumnsMatcher::appendColumnName(WriteBuffer & ostr) const { writeString(original_pattern, ostr); }
void ASTColumnsRegexpMatcher::appendColumnName(WriteBuffer & ostr) const
{
writeCString("COLUMNS(", ostr);
writeQuotedString(original_pattern, ostr);
writeChar(')', ostr);
}
void ASTColumnsMatcher::updateTreeHashImpl(SipHash & hash_state) const
void ASTColumnsRegexpMatcher::updateTreeHashImpl(SipHash & hash_state) const
{
hash_state.update(original_pattern.size());
hash_state.update(original_pattern);
IAST::updateTreeHashImpl(hash_state);
}
void ASTColumnsMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
void ASTColumnsRegexpMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
if (column_list)
{
frame.expression_list_prepend_whitespace = false;
column_list->formatImpl(settings, state, frame);
}
else
settings.ostr << quoteString(original_pattern);
settings.ostr << ")";
for (ASTs::const_iterator it = children.begin() + 1; it != children.end(); ++it)
/// Format column transformers
for (const auto & child : children)
{
settings.ostr << ' ';
(*it)->formatImpl(settings, state, frame);
child->formatImpl(settings, state, frame);
}
}
void ASTColumnsMatcher::setPattern(String pattern)
void ASTColumnsRegexpMatcher::setPattern(String pattern)
{
original_pattern = std::move(pattern);
column_matcher = std::make_shared<RE2>(original_pattern, RE2::Quiet);
if (!column_matcher->ok())
throw DB::Exception("COLUMNS pattern " + original_pattern + " cannot be compiled: " + column_matcher->error(), DB::ErrorCodes::CANNOT_COMPILE_REGEXP);
throw DB::Exception(
"COLUMNS pattern " + original_pattern + " cannot be compiled: " + column_matcher->error(),
DB::ErrorCodes::CANNOT_COMPILE_REGEXP);
}
bool ASTColumnsMatcher::isColumnMatching(const String & column_name) const
bool ASTColumnsRegexpMatcher::isColumnMatching(const String & column_name) const
{
return RE2::PartialMatch(column_name, *column_matcher);
}
ASTPtr ASTColumnsListMatcher::clone() const
{
auto clone = std::make_shared<ASTColumnsListMatcher>(*this);
clone->column_list = column_list->clone();
clone->cloneChildren();
return clone;
}
void ASTColumnsListMatcher::updateTreeHashImpl(SipHash & hash_state) const
{
column_list->updateTreeHash(hash_state);
IAST::updateTreeHashImpl(hash_state);
}
void ASTColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const
{
writeCString("COLUMNS(", ostr);
for (auto it = column_list->children.begin(); it != column_list->children.end(); ++it)
{
if (it != column_list->children.begin())
writeCString(", ", ostr);
(*it)->appendColumnName(ostr);
}
writeChar(')', ostr);
}
void ASTColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
for (ASTs::const_iterator it = column_list->children.begin(); it != column_list->children.end(); ++it)
{
if (it != column_list->children.begin())
{
settings.ostr << ", ";
}
(*it)->formatImpl(settings, state, frame);
}
settings.ostr << ")";
/// Format column transformers
for (const auto & child : children)
{
settings.ostr << ' ';
child->formatImpl(settings, state, frame);
}
}
}

View File

@ -2,10 +2,9 @@
#include <Parsers/IAST.h>
namespace re2
{
class RE2;
class RE2;
}
@ -14,21 +13,13 @@ namespace DB
class WriteBuffer;
namespace ErrorCodes
{
}
struct AsteriskSemantic;
struct AsteriskSemanticImpl;
/** SELECT COLUMNS('regexp') is expanded to multiple columns like * (asterisk).
* Optional transformers can be attached to further manipulate these expanded columns.
*/
class ASTColumnsMatcher : public IAST
class ASTColumnsRegexpMatcher : public IAST
{
public:
String getID(char) const override { return "ColumnsMatcher"; }
String getID(char) const override { return "ColumnsRegexpMatcher"; }
ASTPtr clone() const override;
void appendColumnName(WriteBuffer & ostr) const override;
@ -36,17 +27,26 @@ public:
bool isColumnMatching(const String & column_name) const;
void updateTreeHashImpl(SipHash & hash_state) const override;
ASTPtr column_list;
protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
private:
std::shared_ptr<re2::RE2> column_matcher;
String original_pattern;
std::shared_ptr<AsteriskSemanticImpl> semantic; /// pimpl
};
friend struct AsteriskSemantic;
/// Same as the above but use a list of column names to do matching.
class ASTColumnsListMatcher : public IAST
{
public:
String getID(char) const override { return "ColumnsListMatcher"; }
ASTPtr clone() const override;
void appendColumnName(WriteBuffer & ostr) const override;
void updateTreeHashImpl(SipHash & hash_state) const override;
ASTPtr column_list;
protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
};

View File

@ -105,6 +105,49 @@ void ASTColumnsApplyTransformer::transform(ASTs & nodes) const
}
}
void ASTColumnsApplyTransformer::appendColumnName(WriteBuffer & ostr) const
{
writeCString("APPLY ", ostr);
if (!column_name_prefix.empty())
writeChar('(', ostr);
if (lambda)
lambda->appendColumnName(ostr);
else
{
writeString(func_name, ostr);
if (parameters)
parameters->appendColumnName(ostr);
}
if (!column_name_prefix.empty())
{
writeCString(", '", ostr);
writeString(column_name_prefix, ostr);
writeCString("')", ostr);
}
}
void ASTColumnsApplyTransformer::updateTreeHashImpl(SipHash & hash_state) const
{
hash_state.update(func_name.size());
hash_state.update(func_name);
if (parameters)
parameters->updateTreeHashImpl(hash_state);
if (lambda)
lambda->updateTreeHashImpl(hash_state);
hash_state.update(lambda_arg.size());
hash_state.update(lambda_arg);
hash_state.update(column_name_prefix.size());
hash_state.update(column_name_prefix);
IAST::updateTreeHashImpl(hash_state);
}
void ASTColumnsExceptTransformer::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << "EXCEPT" << (is_strict ? " STRICT " : " ") << (settings.hilite ? hilite_none : "");
@ -128,6 +171,38 @@ void ASTColumnsExceptTransformer::formatImpl(const FormatSettings & settings, Fo
settings.ostr << ")";
}
void ASTColumnsExceptTransformer::appendColumnName(WriteBuffer & ostr) const
{
writeCString("EXCEPT ", ostr);
if (is_strict)
writeCString("STRICT ", ostr);
if (children.size() > 1)
writeChar('(', ostr);
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
{
if (it != children.begin())
writeCString(", ", ostr);
(*it)->appendColumnName(ostr);
}
if (!original_pattern.empty())
writeQuotedString(original_pattern, ostr);
if (children.size() > 1)
writeChar(')', ostr);
}
void ASTColumnsExceptTransformer::updateTreeHashImpl(SipHash & hash_state) const
{
hash_state.update(is_strict);
hash_state.update(original_pattern.size());
hash_state.update(original_pattern);
IAST::updateTreeHashImpl(hash_state);
}
void ASTColumnsExceptTransformer::transform(ASTs & nodes) const
{
std::set<String> expected_columns;
@ -201,6 +276,21 @@ void ASTColumnsReplaceTransformer::Replacement::formatImpl(
settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(name);
}
void ASTColumnsReplaceTransformer::Replacement::appendColumnName(WriteBuffer & ostr) const
{
expr->appendColumnName(ostr);
writeCString(" AS ", ostr);
writeProbablyBackQuotedString(name, ostr);
}
void ASTColumnsReplaceTransformer::Replacement::updateTreeHashImpl(SipHash & hash_state) const
{
hash_state.update(name.size());
hash_state.update(name);
expr->updateTreeHashImpl(hash_state);
IAST::updateTreeHashImpl(hash_state);
}
void ASTColumnsReplaceTransformer::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << "REPLACE" << (is_strict ? " STRICT " : " ") << (settings.hilite ? hilite_none : "");
@ -211,9 +301,8 @@ void ASTColumnsReplaceTransformer::formatImpl(const FormatSettings & settings, F
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
{
if (it != children.begin())
{
settings.ostr << ", ";
}
(*it)->formatImpl(settings, state, frame);
}
@ -221,6 +310,32 @@ void ASTColumnsReplaceTransformer::formatImpl(const FormatSettings & settings, F
settings.ostr << ")";
}
void ASTColumnsReplaceTransformer::appendColumnName(WriteBuffer & ostr) const
{
writeCString("REPLACE ", ostr);
if (is_strict)
writeCString("STRICT ", ostr);
if (children.size() > 1)
writeChar('(', ostr);
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
{
if (it != children.begin())
writeCString(", ", ostr);
(*it)->appendColumnName(ostr);
}
if (children.size() > 1)
writeChar(')', ostr);
}
void ASTColumnsReplaceTransformer::updateTreeHashImpl(SipHash & hash_state) const
{
hash_state.update(is_strict);
IAST::updateTreeHashImpl(hash_state);
}
void ASTColumnsReplaceTransformer::replaceChildren(ASTPtr & node, const ASTPtr & replacement, const String & name)
{
for (auto & child : node->children)

View File

@ -30,6 +30,8 @@ public:
return res;
}
void transform(ASTs & nodes) const override;
void appendColumnName(WriteBuffer & ostr) const override;
void updateTreeHashImpl(SipHash & hash_state) const override;
// Case 1 APPLY (quantile(0.9))
String func_name;
@ -59,6 +61,8 @@ public:
void transform(ASTs & nodes) const override;
void setPattern(String pattern);
bool isColumnMatching(const String & column_name) const;
void appendColumnName(WriteBuffer & ostr) const override;
void updateTreeHashImpl(SipHash & hash_state) const override;
protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
@ -76,12 +80,13 @@ public:
ASTPtr clone() const override
{
auto replacement = std::make_shared<Replacement>(*this);
replacement->children.clear();
replacement->expr = expr->clone();
replacement->children.push_back(replacement->expr);
return replacement;
}
void appendColumnName(WriteBuffer & ostr) const override;
void updateTreeHashImpl(SipHash & hash_state) const override;
String name;
ASTPtr expr;
@ -98,6 +103,8 @@ public:
return clone;
}
void transform(ASTs & nodes) const override;
void appendColumnName(WriteBuffer & ostr) const override;
void updateTreeHashImpl(SipHash & hash_state) const override;
protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;

View File

@ -17,6 +17,8 @@ void ASTQualifiedAsterisk::formatImpl(const FormatSettings & settings, FormatSta
const auto & qualifier = children.at(0);
qualifier->formatImpl(settings, state, frame);
settings.ostr << ".*";
/// Format column transformers
for (ASTs::const_iterator it = children.begin() + 1; it != children.end(); ++it)
{
settings.ostr << ' ';

View File

@ -1796,16 +1796,18 @@ bool ParserColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
return false;
++pos;
auto res = std::make_shared<ASTColumnsMatcher>();
ASTPtr res;
if (column_list)
{
res->column_list = column_list;
res->children.push_back(res->column_list);
auto list_matcher = std::make_shared<ASTColumnsListMatcher>();
list_matcher->column_list = column_list;
res = list_matcher;
}
else
{
res->setPattern(regex_node->as<ASTLiteral &>().value.get<String>());
res->children.push_back(regex_node);
auto regexp_matcher = std::make_shared<ASTColumnsRegexpMatcher>();
regexp_matcher->setPattern(regex_node->as<ASTLiteral &>().value.get<String>());
res = regexp_matcher;
}
ParserColumnsTransformers transformers_p(allowed_transformers);

View File

@ -0,0 +1,64 @@
DROP TABLE IF EXISTS github_events;
CREATE TABLE github_events
(
`file_time` DateTime,
`event_type` Enum8('CommitCommentEvent' = 1, 'CreateEvent' = 2, 'DeleteEvent' = 3, 'ForkEvent' = 4, 'GollumEvent' = 5, 'IssueCommentEvent' = 6, 'IssuesEvent' = 7, 'MemberEvent' = 8, 'PublicEvent' = 9, 'PullRequestEvent' = 10, 'PullRequestReviewCommentEvent' = 11, 'PushEvent' = 12, 'ReleaseEvent' = 13, 'SponsorshipEvent' = 14, 'WatchEvent' = 15, 'GistEvent' = 16, 'FollowEvent' = 17, 'DownloadEvent' = 18, 'PullRequestReviewEvent' = 19, 'ForkApplyEvent' = 20, 'Event' = 21, 'TeamAddEvent' = 22),
`actor_login` LowCardinality(String),
`repo_name` LowCardinality(String),
`created_at` DateTime,
`updated_at` DateTime,
`action` Enum8('none' = 0, 'created' = 1, 'added' = 2, 'edited' = 3, 'deleted' = 4, 'opened' = 5, 'closed' = 6, 'reopened' = 7, 'assigned' = 8, 'unassigned' = 9, 'labeled' = 10, 'unlabeled' = 11, 'review_requested' = 12, 'review_request_removed' = 13, 'synchronize' = 14, 'started' = 15, 'published' = 16, 'update' = 17, 'create' = 18, 'fork' = 19, 'merged' = 20),
`comment_id` UInt64,
`body` String,
`path` String,
`position` Int32,
`line` Int32,
`ref` LowCardinality(String),
`ref_type` Enum8('none' = 0, 'branch' = 1, 'tag' = 2, 'repository' = 3, 'unknown' = 4),
`creator_user_login` LowCardinality(String),
`number` UInt32,
`title` String,
`labels` Array(LowCardinality(String)),
`state` Enum8('none' = 0, 'open' = 1, 'closed' = 2),
`locked` UInt8,
`assignee` LowCardinality(String),
`assignees` Array(LowCardinality(String)),
`comments` UInt32,
`author_association` Enum8('NONE' = 0, 'CONTRIBUTOR' = 1, 'OWNER' = 2, 'COLLABORATOR' = 3, 'MEMBER' = 4, 'MANNEQUIN' = 5),
`closed_at` DateTime,
`merged_at` DateTime,
`merge_commit_sha` String,
`requested_reviewers` Array(LowCardinality(String)),
`requested_teams` Array(LowCardinality(String)),
`head_ref` LowCardinality(String),
`head_sha` String,
`base_ref` LowCardinality(String),
`base_sha` String,
`merged` UInt8,
`mergeable` UInt8,
`rebaseable` UInt8,
`mergeable_state` Enum8('unknown' = 0, 'dirty' = 1, 'clean' = 2, 'unstable' = 3, 'draft' = 4),
`merged_by` LowCardinality(String),
`review_comments` UInt32,
`maintainer_can_modify` UInt8,
`commits` UInt32,
`additions` UInt32,
`deletions` UInt32,
`changed_files` UInt32,
`diff_hunk` String,
`original_position` UInt32,
`commit_id` String,
`original_commit_id` String,
`push_size` UInt32,
`push_distinct_size` UInt32,
`member_login` LowCardinality(String),
`release_tag_name` String,
`release_name` String,
`review_state` Enum8('none' = 0, 'approved' = 1, 'changes_requested' = 2, 'commented' = 3, 'dismissed' = 4, 'pending' = 5)
)
ENGINE = MergeTree ORDER BY (event_type, repo_name, created_at);
with top_repos as ( select repo_name from github_events where event_type = 'WatchEvent' and toDate(created_at) = today() - 1 group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toMonday(created_at) = toMonday(today() - interval 1 week) group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toYear(created_at) = toYear(today()) - 1 group by repo_name order by count() desc limit 100 ), last_day as ( select repo_name, count() as count_last_day, rowNumberInAllBlocks() + 1 as position_last_day from github_events where repo_name in (select repo_name from top_repos) and toDate(created_at) = today() - 1 group by repo_name order by count_last_day desc ), last_week as ( select repo_name, count() as count_last_week, rowNumberInAllBlocks() + 1 as position_last_week from github_events where repo_name in (select repo_name from top_repos) and toMonday(created_at) = toMonday(today()) - interval 1 week group by repo_name order by count_last_week desc ), last_month as ( select repo_name, count() as count_last_month, rowNumberInAllBlocks() + 1 as position_last_month from github_events where repo_name in (select repo_name from top_repos) and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count_last_month desc ) select d.repo_name, columns(count) from last_day d join last_week w on d.repo_name = w.repo_name join last_month m on d.repo_name = m.repo_name FORMAT TabSeparatedWithNamesAndTypes; -- { serverError 47 }
DROP TABLE github_events;