ClickHouse/src/Storages/WindowView/StorageWindowView.cpp

1258 lines
47 KiB
C++
Raw Normal View History

#include <numeric>
#include <regex>
2020-03-01 18:08:52 +00:00
#include <DataStreams/ExpressionBlockInputStream.h>
2020-01-14 03:07:31 +00:00
#include <DataStreams/IBlockOutputStream.h>
#include <DataStreams/MaterializingBlockInputStream.h>
#include <DataStreams/SquashingBlockInputStream.h>
#include <DataStreams/copyData.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypesNumber.h>
2020-02-12 17:39:57 +00:00
#include <Functions/FunctionFactory.h>
2020-01-14 03:07:31 +00:00
#include <Functions/FunctionsWindow.h>
#include <Interpreters/AddDefaultDatabaseVisitor.h>
2020-06-09 08:48:04 +00:00
#include <Interpreters/Context.h>
2020-01-14 03:07:31 +00:00
#include <Interpreters/InDepthNodeVisitor.h>
2020-02-12 17:39:57 +00:00
#include <Interpreters/InterpreterAlterQuery.h>
2020-01-14 03:07:31 +00:00
#include <Interpreters/InterpreterDropQuery.h>
#include <Interpreters/QueryAliasesVisitor.h>
2020-01-14 03:07:31 +00:00
#include <Interpreters/getTableExpressions.h>
2020-02-12 17:39:57 +00:00
#include <Parsers/ASTAlterQuery.h>
#include <Parsers/ASTAsterisk.h>
2020-01-14 03:07:31 +00:00
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTDropQuery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
2020-03-22 15:03:16 +00:00
#include <Parsers/ASTSetQuery.h>
2020-01-14 03:07:31 +00:00
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTWatchQuery.h>
#include <Parsers/formatAST.h>
2020-03-22 15:03:16 +00:00
#include <Parsers/queryToString.h>
2020-02-22 17:06:10 +00:00
#include <Processors/Sources/SourceFromInputStream.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
2021-05-28 07:36:19 +00:00
#include <Processors/Transforms/ExpressionTransform.h>
2020-02-22 17:06:10 +00:00
#include <Processors/Transforms/FilterTransform.h>
2020-01-14 03:07:31 +00:00
#include <Storages/StorageFactory.h>
2020-02-14 08:07:03 +00:00
#include <boost/lexical_cast.hpp>
2020-01-14 03:07:31 +00:00
#include <Common/typeid_cast.h>
#include <Storages/WindowView/ReplaceWindowColumnBlockInputStream.h>
2020-01-14 03:07:31 +00:00
#include <Storages/WindowView/StorageWindowView.h>
2020-02-21 12:35:26 +00:00
#include <Storages/WindowView/WatermarkBlockInputStream.h>
2020-01-14 03:07:31 +00:00
#include <Storages/WindowView/WindowViewBlockInputStream.h>
#include <Storages/WindowView/WindowViewProxyStorage.h>
namespace DB
{
namespace ErrorCodes
{
2020-03-01 18:08:52 +00:00
extern const int ARGUMENT_OUT_OF_BOUND;
2020-07-21 09:43:42 +00:00
extern const int BAD_ARGUMENTS;
2020-07-27 09:32:15 +00:00
extern const int ILLEGAL_COLUMN;
2020-07-21 09:43:42 +00:00
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
2020-01-14 03:07:31 +00:00
extern const int INCORRECT_QUERY;
extern const int LOGICAL_ERROR;
2020-01-14 03:07:31 +00:00
extern const int QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW;
extern const int SUPPORT_IS_DISABLED;
extern const int TABLE_WAS_NOT_DROPPED;
2020-01-14 03:07:31 +00:00
}
namespace
{
2021-05-28 07:36:19 +00:00
struct FetchQueryInfoMatcher
2020-01-14 03:07:31 +00:00
{
2021-05-28 07:36:19 +00:00
using Visitor = InDepthNodeVisitor<FetchQueryInfoMatcher, true>;
2020-03-22 15:03:16 +00:00
using TypeToVisit = ASTFunction;
2020-01-14 03:07:31 +00:00
2020-07-20 13:32:34 +00:00
struct Data
{
ASTPtr window_function;
String window_id_name;
String window_id_alias;
String serialized_window_function;
String timestamp_column_name;
bool is_tumble = false;
bool is_hop = false;
};
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
2020-01-14 03:07:31 +00:00
2020-07-20 13:32:34 +00:00
static void visit(ASTPtr & ast, Data & data)
2020-01-14 03:07:31 +00:00
{
2020-07-20 13:32:34 +00:00
if (auto * t = ast->as<ASTFunction>())
2020-01-14 03:07:31 +00:00
{
2020-07-20 13:32:34 +00:00
if (t->name == "TUMBLE" || t->name == "HOP")
{
2020-07-20 13:32:34 +00:00
data.is_tumble = t->name == "TUMBLE";
data.is_hop = t->name == "HOP";
if (!data.window_function)
{
t->name = "WINDOW_ID";
data.window_id_name = t->getColumnName();
data.window_id_alias = t->alias;
data.window_function = t->clone();
data.window_function->setAlias("");
data.serialized_window_function = serializeAST(*data.window_function);
data.timestamp_column_name = t->arguments->children[0]->getColumnName();
}
else
{
auto temp_node = t->clone();
temp_node->setAlias("");
if (serializeAST(*temp_node) != data.serialized_window_function)
throw Exception("WINDOW VIEW only support ONE WINDOW FUNCTION", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW);
}
}
2020-01-14 03:07:31 +00:00
}
}
};
2020-02-14 08:55:56 +00:00
2020-07-20 13:32:34 +00:00
struct ReplaceWindowIdMatcher
{
2020-07-20 13:32:34 +00:00
public:
using Visitor = InDepthNodeVisitor<ReplaceWindowIdMatcher, true>;
struct Data
{
String window_name;
};
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
2020-07-20 13:32:34 +00:00
static void visit(ASTPtr & ast, Data & data)
{
2020-07-20 13:32:34 +00:00
if (auto * t = ast->as<ASTFunction>())
2020-06-17 15:06:19 +00:00
{
2020-07-20 13:32:34 +00:00
if (t->name == "WINDOW_ID")
t->name = data.window_name;
2020-06-17 15:06:19 +00:00
}
}
};
2020-07-19 18:56:55 +00:00
struct ReplaceFunctionNowData
2020-03-22 15:03:16 +00:00
{
using TypeToVisit = ASTFunction;
bool is_time_column_func_now = false;
String window_id_name;
2020-03-22 15:03:16 +00:00
void visit(ASTFunction & node, ASTPtr & node_ptr)
{
if (node.name == "WINDOW_ID")
2020-03-22 15:03:16 +00:00
{
if (const auto * t = node.arguments->children[0]->as<ASTFunction>(); t && t->name == "now")
{
is_time_column_func_now = true;
node_ptr->children[0]->children[0] = std::make_shared<ASTIdentifier>("____timestamp");
window_id_name = node.getColumnName();
2020-03-22 15:03:16 +00:00
}
}
}
};
2020-07-19 18:56:55 +00:00
using ReplaceFunctionNowVisitor = InDepthNodeVisitor<OneTypeMatcher<ReplaceFunctionNowData>, true>;
2020-07-20 13:32:34 +00:00
struct ReplaceFunctionWindowMatcher
{
2020-07-20 13:32:34 +00:00
using Visitor = InDepthNodeVisitor<ReplaceFunctionWindowMatcher, true>;
struct Data{};
2020-07-20 13:32:34 +00:00
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
static void visit(ASTPtr & ast, Data &)
{
2020-07-20 13:32:34 +00:00
if (auto * t = ast->as<ASTFunction>())
{
2021-05-29 10:29:38 +00:00
if (t->name == "HOP" || t->name == "TUMBLE")
t->name = "WINDOW_ID";
2020-07-20 13:32:34 +00:00
}
2020-03-22 15:03:16 +00:00
}
};
2020-07-19 18:56:55 +00:00
class ToIdentifierMatcher
2020-02-17 05:06:03 +00:00
{
public:
2020-07-19 18:56:55 +00:00
using Visitor = InDepthNodeVisitor<ToIdentifierMatcher, true>;
2020-02-17 05:06:03 +00:00
struct Data
{
String window_id_name;
String window_id_alias;
Aliases * aliases;
2020-02-17 05:06:03 +00:00
};
2020-03-22 15:03:16 +00:00
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
2020-02-17 05:06:03 +00:00
static void visit(ASTPtr & ast, Data & data)
{
if (const auto * t = ast->as<ASTFunction>())
visit(*t, ast, data);
2020-03-22 15:03:16 +00:00
if (const auto * t = ast->as<ASTIdentifier>())
visit(*t, ast, data);
2020-02-17 05:06:03 +00:00
}
private:
static void visit(const ASTFunction & node, ASTPtr & node_ptr, Data &)
2020-02-17 05:06:03 +00:00
{
if (node.name == "tuple")
return;
else
node_ptr = std::make_shared<ASTIdentifier>(node.getColumnName());
2020-02-17 05:06:03 +00:00
}
2020-03-22 15:03:16 +00:00
static void visit(const ASTIdentifier & node, ASTPtr & node_ptr, Data & data)
{
if (node.getColumnName() == data.window_id_alias)
2021-05-29 10:29:38 +00:00
{
if (auto identifier = std::dynamic_pointer_cast<ASTIdentifier>(node_ptr))
identifier->setShortName(data.window_id_name);
}
2020-03-22 15:03:16 +00:00
}
2020-02-17 05:06:03 +00:00
};
2020-07-21 17:41:03 +00:00
IntervalKind strToIntervalKind(const String& interval_str)
2020-02-14 08:07:03 +00:00
{
if (interval_str == "Second")
return IntervalKind::Second;
else if (interval_str == "Minute")
return IntervalKind::Minute;
else if (interval_str == "Hour")
return IntervalKind::Hour;
else if (interval_str == "Day")
return IntervalKind::Day;
else if (interval_str == "Week")
return IntervalKind::Week;
else if (interval_str == "Month")
return IntervalKind::Month;
else if (interval_str == "Quarter")
return IntervalKind::Quarter;
else if (interval_str == "Year")
return IntervalKind::Year;
__builtin_unreachable();
}
2020-07-21 09:43:42 +00:00
2020-07-21 17:41:03 +00:00
void extractWindowArgument(const ASTPtr & ast, IntervalKind::Kind & kind, Int64 & num_units, String err_msg)
2020-07-21 09:43:42 +00:00
{
const auto * arg = ast->as<ASTFunction>();
if (!arg || !startsWith(arg->name, "toInterval"))
throw Exception(err_msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
kind = strToIntervalKind(arg->name.substr(10));
const auto * interval_unit = arg->children.front()->children.front()->as<ASTLiteral>();
if (!interval_unit
|| (interval_unit->value.getType() != Field::Types::String && interval_unit->value.getType() != Field::Types::UInt64))
throw Exception("Interval argument must be integer", ErrorCodes::BAD_ARGUMENTS);
if (interval_unit->value.getType() == Field::Types::String)
num_units = std::stoi(interval_unit->value.safeGet<String>());
else
num_units = interval_unit->value.safeGet<UInt64>();
if (num_units <= 0)
throw Exception("Value for Interval argument must be positive.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
}
2020-07-26 19:11:27 +00:00
UInt32 addTime(UInt32 time_sec, IntervalKind::Kind kind, Int64 num_units, const DateLUTImpl & time_zone)
{
switch (kind)
{
#define CASE_WINDOW_KIND(KIND) \
case IntervalKind::KIND: { \
return AddTime<IntervalKind::KIND>::execute(time_sec, num_units, time_zone); \
}
CASE_WINDOW_KIND(Second)
CASE_WINDOW_KIND(Minute)
CASE_WINDOW_KIND(Hour)
CASE_WINDOW_KIND(Day)
CASE_WINDOW_KIND(Week)
CASE_WINDOW_KIND(Month)
CASE_WINDOW_KIND(Quarter)
CASE_WINDOW_KIND(Year)
#undef CASE_WINDOW_KIND
}
__builtin_unreachable();
}
2020-01-14 03:07:31 +00:00
}
static void extractDependentTable(ASTSelectQuery & query, String & select_database_name, String & select_table_name)
{
auto db_and_table = getDatabaseAndTable(query, 0);
ASTPtr subquery = extractTableExpression(query, 0);
if (!db_and_table && !subquery)
return;
if (db_and_table)
{
select_table_name = db_and_table->table;
if (db_and_table->database.empty())
{
db_and_table->database = select_database_name;
AddDefaultDatabaseVisitor visitor(select_database_name);
visitor.visit(query);
}
else
select_database_name = db_and_table->database;
}
else if (auto * ast_select = subquery->as<ASTSelectWithUnionQuery>())
{
if (ast_select->list_of_selects->children.size() != 1)
throw Exception("UNION is not supported for WINDOW VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW);
2020-07-19 19:00:21 +00:00
auto & inner_select_query = ast_select->list_of_selects->children.at(0);
2020-01-14 03:07:31 +00:00
2020-07-19 19:00:21 +00:00
extractDependentTable(inner_select_query->as<ASTSelectQuery &>(), select_database_name, select_table_name);
2020-01-14 03:07:31 +00:00
}
else
throw Exception(
"Logical error while creating StorageWindowView."
" Could not retrieve table name from select query.",
DB::ErrorCodes::LOGICAL_ERROR);
}
2020-07-13 14:31:54 +00:00
UInt32 StorageWindowView::getCleanupBound()
{
UInt32 w_bound;
{
std::lock_guard lock(fire_signal_mutex);
w_bound = max_fired_watermark;
if (w_bound == 0)
return 0;
if (!is_proctime)
{
if (max_watermark == 0)
return 0;
if (allowed_lateness)
{
2020-07-27 04:10:52 +00:00
UInt32 lateness_bound = addTime(max_timestamp, lateness_kind, -1 * lateness_num_units, *time_zone);
2020-07-13 14:31:54 +00:00
lateness_bound = getWindowLowerBound(lateness_bound);
if (lateness_bound < w_bound)
w_bound = lateness_bound;
}
}
}
return w_bound;
}
2021-05-28 07:36:19 +00:00
ASTPtr StorageWindowView::getCleanupQuery()
2020-03-25 17:56:49 +00:00
{
ASTPtr function_equal;
2020-07-13 14:31:54 +00:00
function_equal = makeASTFunction("less", std::make_shared<ASTIdentifier>(window_id_name), std::make_shared<ASTLiteral>(getCleanupBound()));
2020-03-25 17:56:49 +00:00
2021-05-28 07:36:19 +00:00
auto alter_query = std::make_shared<ASTAlterQuery>();
alter_query->database = inner_table_id.database_name;
alter_query->table = inner_table_id.table_name;
alter_query->set(alter_query->command_list, std::make_shared<ASTExpressionList>());
2020-03-25 17:56:49 +00:00
auto alter_command = std::make_shared<ASTAlterCommand>();
alter_command->type = ASTAlterCommand::DELETE;
alter_command->predicate = function_equal;
alter_command->children.push_back(alter_command->predicate);
2021-05-28 07:36:19 +00:00
alter_query->command_list->children.push_back(alter_command);
2020-03-25 17:56:49 +00:00
return alter_query;
}
2020-01-14 03:07:31 +00:00
void StorageWindowView::checkTableCanBeDropped() const
{
2020-01-24 02:45:45 +00:00
auto table_id = getStorageID();
Dependencies dependencies = DatabaseCatalog::instance().getDependencies(table_id);
2020-01-14 03:07:31 +00:00
if (!dependencies.empty())
{
2020-01-24 02:45:45 +00:00
StorageID dependent_table_id = dependencies.front();
throw Exception("Table has dependency " + dependent_table_id.getNameForLogs(), ErrorCodes::TABLE_WAS_NOT_DROPPED);
2020-01-14 03:07:31 +00:00
}
}
2021-05-28 07:36:19 +00:00
static void executeDropQuery(ASTDropQuery::Kind kind, ContextPtr global_context, ContextPtr current_context, const StorageID & target_table_id, bool no_delay)
2020-02-12 17:39:57 +00:00
{
2021-05-28 07:36:19 +00:00
if (DatabaseCatalog::instance().tryGetTable(target_table_id, current_context))
2020-02-12 17:39:57 +00:00
{
auto drop_query = std::make_shared<ASTDropQuery>();
drop_query->database = target_table_id.database_name;
drop_query->table = target_table_id.table_name;
drop_query->kind = kind;
2021-05-28 07:36:19 +00:00
drop_query->no_delay = no_delay;
2020-02-12 17:39:57 +00:00
ASTPtr ast_drop_query = drop_query;
2021-05-28 07:36:19 +00:00
auto drop_context = Context::createCopy(global_context);
drop_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
if (auto txn = current_context->getZooKeeperMetadataTransaction())
{
/// For Replicated database
drop_context->setQueryContext(current_context);
drop_context->initZooKeeperMetadataTransaction(txn, true);
}
InterpreterDropQuery drop_interpreter(ast_drop_query, drop_context);
2020-02-12 17:39:57 +00:00
drop_interpreter.execute();
}
}
2020-06-09 08:48:04 +00:00
void StorageWindowView::drop()
2020-01-14 03:07:31 +00:00
{
2020-01-24 02:45:45 +00:00
auto table_id = getStorageID();
DatabaseCatalog::instance().removeDependency(select_table_id, table_id);
2021-05-28 07:36:19 +00:00
executeDropQuery(ASTDropQuery::Kind::Drop, getContext(), getContext(), inner_table_id, true);
2020-02-12 17:39:57 +00:00
2020-01-14 03:07:31 +00:00
std::lock_guard lock(mutex);
is_dropped = true;
fire_condition.notify_all();
2020-01-14 03:07:31 +00:00
}
2021-05-28 07:36:19 +00:00
void StorageWindowView::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &)
2020-02-17 08:18:27 +00:00
{
2021-05-28 07:36:19 +00:00
executeDropQuery(ASTDropQuery::Kind::Truncate, getContext(), local_context, inner_table_id, true);
2020-02-17 08:18:27 +00:00
}
2020-07-15 14:38:50 +00:00
bool StorageWindowView::optimize(
const ASTPtr & query,
2021-05-28 07:36:19 +00:00
const StorageMetadataPtr & /*metadata_snapshot*/,
2020-07-15 14:38:50 +00:00
const ASTPtr & partition,
bool final,
bool deduplicate,
2021-05-28 07:36:19 +00:00
const Names & deduplicate_by_columns,
ContextPtr local_context)
2020-02-22 17:06:10 +00:00
{
2021-05-28 07:36:19 +00:00
auto storage_ptr = getInnerStorage();
auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr();
return getInnerStorage()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context);
2020-02-22 17:06:10 +00:00
}
2020-07-13 14:31:54 +00:00
inline void StorageWindowView::cleanup()
2020-02-12 17:39:57 +00:00
{
2021-05-28 07:36:19 +00:00
InterpreterAlterQuery alt_query(getCleanupQuery(), window_view_context);
alt_query.execute();
2020-02-20 17:30:58 +00:00
2020-03-03 04:42:12 +00:00
std::lock_guard lock(fire_signal_mutex);
2020-03-01 18:08:52 +00:00
watch_streams.remove_if([](std::weak_ptr<WindowViewBlockInputStream> & ptr) { return ptr.expired(); });
2020-02-12 17:39:57 +00:00
}
2020-03-01 18:08:52 +00:00
inline void StorageWindowView::fire(UInt32 watermark)
2020-01-14 03:07:31 +00:00
{
if (target_table_id.empty() && watch_streams.empty())
return;
2020-03-03 04:42:12 +00:00
BlockInputStreamPtr in_stream;
{
std::lock_guard lock(mutex);
in_stream = getNewBlocksInputStreamPtr(watermark);
}
if (target_table_id.empty())
{
2020-03-01 18:08:52 +00:00
in_stream->readPrefix();
2020-03-24 02:46:56 +00:00
while (auto block = in_stream->read())
{
for (auto & watch_stream : watch_streams)
{
2020-03-24 02:46:56 +00:00
if (auto watch_stream_ptr = watch_stream.lock())
watch_stream_ptr->addBlock(block);
}
}
2020-03-01 18:08:52 +00:00
in_stream->readSuffix();
}
else
{
StoragePtr target_table = getTargetStorage();
2020-07-15 14:38:50 +00:00
auto metadata_snapshot = target_table->getInMemoryMetadataPtr();
2021-05-28 07:36:19 +00:00
auto lock = target_table->lockForShare(window_view_context->getCurrentQueryId(), window_view_context->getSettingsRef().lock_acquire_timeout);
auto out_stream = target_table->write(getFinalQuery(), metadata_snapshot, window_view_context);
in_stream->readPrefix();
out_stream->writePrefix();
while (auto block = in_stream->read())
{
for (auto & watch_stream : watch_streams)
{
if (const auto & watch_stream_ptr = watch_stream.lock())
watch_stream_ptr->addBlock(block);
}
out_stream->write(std::move(block));
}
in_stream->readSuffix();
out_stream->writeSuffix();
}
fire_condition.notify_all();
2020-01-14 03:07:31 +00:00
}
2021-05-28 07:36:19 +00:00
std::shared_ptr<ASTCreateQuery> StorageWindowView::getInnerTableCreateQuery(const ASTPtr & inner_query, ASTStorage * storage, const String & database_name, const String & table_name)
2020-02-12 17:39:57 +00:00
{
/// We will create a query to create an internal table.
auto inner_create_query = std::make_shared<ASTCreateQuery>();
inner_create_query->database = database_name;
inner_create_query->table = table_name;
2020-02-12 17:39:57 +00:00
2020-07-19 19:00:21 +00:00
auto inner_select_query = std::static_pointer_cast<ASTSelectQuery>(inner_query);
2020-03-24 02:46:56 +00:00
auto t_sample_block
2020-07-15 14:38:50 +00:00
= InterpreterSelectQuery(
2021-05-28 07:36:19 +00:00
inner_select_query, window_view_context, getParentStorage(), nullptr, SelectQueryOptions(QueryProcessingStage::WithMergeableState))
2020-02-12 17:39:57 +00:00
.getSampleBlock();
auto columns_list = std::make_shared<ASTExpressionList>();
2020-02-17 05:06:03 +00:00
if (is_time_column_func_now)
2020-02-17 05:06:03 +00:00
{
auto column_window = std::make_shared<ASTColumnDeclaration>();
column_window->name = window_id_name;
column_window->type = std::make_shared<ASTIdentifier>("UInt32");
2020-02-17 05:06:03 +00:00
columns_list->children.push_back(column_window);
}
2020-06-17 15:06:19 +00:00
for (const auto & column : t_sample_block.getColumnsWithTypeAndName())
2020-02-12 17:39:57 +00:00
{
ParserIdentifierWithOptionalParameters parser;
2020-03-24 02:46:56 +00:00
String sql = column.type->getName();
2020-06-09 08:48:04 +00:00
ASTPtr ast = parseQuery(parser, sql.data(), sql.data() + sql.size(), "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
2020-02-12 17:39:57 +00:00
auto column_dec = std::make_shared<ASTColumnDeclaration>();
2020-03-24 02:46:56 +00:00
column_dec->name = column.name;
2020-02-12 17:39:57 +00:00
column_dec->type = ast;
columns_list->children.push_back(column_dec);
}
2020-07-19 18:56:55 +00:00
ToIdentifierMatcher::Data query_data;
query_data.window_id_name = window_id_name;
query_data.window_id_alias = window_id_alias;
2020-07-19 18:56:55 +00:00
ToIdentifierMatcher::Visitor to_identifier_visitor(query_data);
2020-03-22 15:03:16 +00:00
2020-07-19 18:56:55 +00:00
ReplaceFunctionNowData time_now_data;
ReplaceFunctionNowVisitor time_now_visitor(time_now_data);
2020-07-20 13:32:34 +00:00
ReplaceFunctionWindowMatcher::Data func_hop_data;
ReplaceFunctionWindowMatcher::Visitor func_window_visitor(func_hop_data);
2020-03-22 15:03:16 +00:00
auto new_storage = std::make_shared<ASTStorage>();
if (storage == nullptr)
2020-03-22 15:03:16 +00:00
{
new_storage->set(new_storage->engine, makeASTFunction("AggregatingMergeTree"));
for (auto & child : inner_select_query->groupBy()->children)
if (auto * ast_with_alias = dynamic_cast<ASTWithAlias *>(child.get()))
ast_with_alias->setAlias("");
auto order_by = std::make_shared<ASTFunction>();
order_by->name = "tuple";
order_by->arguments = inner_select_query->groupBy();
order_by->children.push_back(order_by->arguments);
ASTPtr order_by_ptr = order_by;
2020-03-22 15:03:16 +00:00
if (is_time_column_func_now)
time_now_visitor.visit(order_by_ptr);
2020-07-19 18:56:55 +00:00
to_identifier_visitor.visit(order_by_ptr);
for (auto & child : order_by->arguments->children)
{
if (child->getColumnName() == window_id_name)
{
ASTPtr tmp = child;
child = order_by->arguments->children[0];
order_by->arguments->children[0] = tmp;
break;
}
}
new_storage->set(new_storage->order_by, order_by_ptr);
new_storage->set(new_storage->primary_key, std::make_shared<ASTIdentifier>(window_id_name));
2020-03-22 15:03:16 +00:00
}
else
2020-03-22 15:03:16 +00:00
{
if (storage->ttl_table)
throw Exception("TTL is not supported for inner table in Window View", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW);
if (!endsWith(storage->engine->name, "MergeTree"))
throw Exception(
"The ENGINE of WindowView must be MergeTree family of table engines including the engines with replication support",
ErrorCodes::INCORRECT_QUERY);
new_storage->set(new_storage->engine, storage->engine->clone());
2020-07-19 19:02:18 +00:00
2020-07-21 10:10:35 +00:00
auto visit = [&](const IAST * ast, IAST * & field)
{
2020-07-19 19:02:18 +00:00
if (ast)
{
auto node = ast->clone();
if (is_time_column_func_now)
time_now_visitor.visit(node);
func_window_visitor.visit(node);
to_identifier_visitor.visit(node);
new_storage->set(field, node);
}
};
visit(storage->partition_by, new_storage->partition_by);
visit(storage->primary_key, new_storage->primary_key);
visit(storage->order_by, new_storage->order_by);
visit(storage->sample_by, new_storage->sample_by);
if (storage->settings)
new_storage->set(new_storage->settings, storage->settings->clone());
2020-03-22 15:03:16 +00:00
}
auto new_columns = std::make_shared<ASTColumns>();
new_columns->set(new_columns->columns, columns_list);
inner_create_query->set(inner_create_query->columns_list, new_columns);
inner_create_query->set(inner_create_query->storage, new_storage);
2020-02-12 17:39:57 +00:00
return inner_create_query;
2020-02-12 17:39:57 +00:00
}
2020-03-03 04:42:12 +00:00
inline UInt32 StorageWindowView::getWindowLowerBound(UInt32 time_sec)
{
2020-03-24 02:46:56 +00:00
IntervalKind window_interval_kind;
2020-03-03 04:42:12 +00:00
if (is_tumble)
2020-03-24 02:46:56 +00:00
window_interval_kind = window_kind;
2020-03-03 04:42:12 +00:00
else
2020-03-24 02:46:56 +00:00
window_interval_kind = hop_kind;
2020-03-03 04:42:12 +00:00
2020-03-24 02:46:56 +00:00
switch (window_interval_kind)
2020-03-03 04:42:12 +00:00
{
#define CASE_WINDOW_KIND(KIND) \
case IntervalKind::KIND: \
{ \
if (is_tumble) \
2020-07-27 04:10:52 +00:00
return ToStartOfTransform<IntervalKind::KIND>::execute(time_sec, window_num_units, *time_zone); \
2020-03-03 04:42:12 +00:00
else \
{\
2020-07-27 04:10:52 +00:00
UInt32 w_start = ToStartOfTransform<IntervalKind::KIND>::execute(time_sec, hop_num_units, *time_zone); \
UInt32 w_end = AddTime<IntervalKind::KIND>::execute(w_start, hop_num_units, *time_zone);\
return AddTime<IntervalKind::KIND>::execute(w_end, -1 * window_num_units, *time_zone);\
}\
2020-03-03 04:42:12 +00:00
}
CASE_WINDOW_KIND(Second)
CASE_WINDOW_KIND(Minute)
CASE_WINDOW_KIND(Hour)
CASE_WINDOW_KIND(Day)
CASE_WINDOW_KIND(Week)
CASE_WINDOW_KIND(Month)
CASE_WINDOW_KIND(Quarter)
CASE_WINDOW_KIND(Year)
#undef CASE_WINDOW_KIND
}
__builtin_unreachable();
}
inline UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec)
2020-01-14 03:07:31 +00:00
{
IntervalKind window_interval_kind;
if (is_tumble)
window_interval_kind = window_kind;
else
window_interval_kind = hop_kind;
switch (window_interval_kind)
2020-01-14 03:07:31 +00:00
{
#define CASE_WINDOW_KIND(KIND) \
case IntervalKind::KIND: \
{ \
if (is_tumble) \
{\
2020-07-27 04:10:52 +00:00
UInt32 w_start = ToStartOfTransform<IntervalKind::KIND>::execute(time_sec, window_num_units, *time_zone); \
return AddTime<IntervalKind::KIND>::execute(w_start, window_num_units, *time_zone); \
}\
else \
{\
2020-07-27 04:10:52 +00:00
UInt32 w_start = ToStartOfTransform<IntervalKind::KIND>::execute(time_sec, hop_num_units, *time_zone); \
return AddTime<IntervalKind::KIND>::execute(w_start, hop_num_units, *time_zone);\
}\
2020-01-14 03:07:31 +00:00
}
2020-01-14 16:24:26 +00:00
CASE_WINDOW_KIND(Second)
CASE_WINDOW_KIND(Minute)
CASE_WINDOW_KIND(Hour)
CASE_WINDOW_KIND(Day)
CASE_WINDOW_KIND(Week)
CASE_WINDOW_KIND(Month)
CASE_WINDOW_KIND(Quarter)
CASE_WINDOW_KIND(Year)
2020-01-14 03:07:31 +00:00
#undef CASE_WINDOW_KIND
}
__builtin_unreachable();
}
2020-03-03 04:42:12 +00:00
inline void StorageWindowView::addFireSignal(std::set<UInt32> & signals)
{
2020-03-03 04:42:12 +00:00
std::lock_guard lock(fire_signal_mutex);
2020-06-17 15:06:19 +00:00
for (const auto & signal : signals)
2020-03-23 03:21:01 +00:00
fire_signal.push_back(signal);
2020-03-01 18:08:52 +00:00
fire_signal_condition.notify_all();
}
2020-03-01 18:08:52 +00:00
inline void StorageWindowView::updateMaxTimestamp(UInt32 timestamp)
2020-02-14 08:07:03 +00:00
{
2020-03-03 04:42:12 +00:00
std::lock_guard lock(fire_signal_mutex);
2020-03-01 18:08:52 +00:00
if (timestamp > max_timestamp)
max_timestamp = timestamp;
2020-02-14 08:07:03 +00:00
}
2020-03-01 18:08:52 +00:00
inline void StorageWindowView::updateMaxWatermark(UInt32 watermark)
2020-02-14 08:07:03 +00:00
{
2020-03-03 04:42:12 +00:00
std::lock_guard lock(fire_signal_mutex);
2020-03-01 18:08:52 +00:00
if (max_watermark == 0)
{
max_watermark = getWindowUpperBound(watermark - 1);
2020-03-01 18:08:52 +00:00
return;
}
2020-03-03 04:42:12 +00:00
bool updated;
2020-03-01 18:08:52 +00:00
if (is_watermark_strictly_ascending)
{
2020-03-03 04:42:12 +00:00
updated = max_watermark < watermark;
2020-03-01 18:08:52 +00:00
while (max_watermark < watermark)
{
fire_signal.push_back(max_watermark);
2020-03-03 04:42:12 +00:00
max_fired_watermark = max_watermark;
max_watermark
2020-07-27 04:10:52 +00:00
= is_tumble ? addTime(max_watermark, window_kind, window_num_units, *time_zone) : addTime(max_watermark, hop_kind, hop_num_units, *time_zone);
2020-03-01 18:08:52 +00:00
}
}
else // strictly || bounded
{
2020-07-27 04:10:52 +00:00
UInt32 max_watermark_bias = addTime(max_watermark, watermark_kind, watermark_num_units, *time_zone);
2020-03-03 04:42:12 +00:00
updated = max_watermark_bias <= watermark;
2020-03-01 18:08:52 +00:00
while (max_watermark_bias <= max_timestamp)
{
fire_signal.push_back(max_watermark);
2020-03-03 04:42:12 +00:00
max_fired_watermark = max_watermark;
2020-06-09 11:37:33 +00:00
if (is_tumble)
{
2020-07-27 04:10:52 +00:00
max_watermark = addTime(max_watermark, window_kind, window_num_units, *time_zone);
max_watermark_bias = addTime(max_watermark, window_kind, window_num_units, *time_zone);
}
else
{
2020-07-27 04:10:52 +00:00
max_watermark = addTime(max_watermark, hop_kind, hop_num_units, *time_zone);
max_watermark_bias = addTime(max_watermark, hop_kind, hop_num_units, *time_zone);
}
2020-03-01 18:08:52 +00:00
}
}
2020-03-03 04:42:12 +00:00
if (updated)
fire_signal_condition.notify_all();
2020-02-14 08:07:03 +00:00
}
2020-07-13 14:31:54 +00:00
void StorageWindowView::threadFuncCleanup()
2020-02-12 17:39:57 +00:00
{
while (!shutdown_called)
{
try
{
2020-02-17 08:18:27 +00:00
sleep(clean_interval);
2020-07-13 14:31:54 +00:00
cleanup();
2020-02-12 17:39:57 +00:00
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
break;
}
}
}
2020-03-01 18:08:52 +00:00
void StorageWindowView::threadFuncFireProc()
2020-01-14 03:07:31 +00:00
{
std::unique_lock lock(fire_signal_mutex);
UInt32 timestamp_now = std::time(nullptr);
while (next_fire_signal <= timestamp_now)
2020-01-14 03:07:31 +00:00
{
try
2020-01-14 03:07:31 +00:00
{
fire(next_fire_signal);
2020-01-14 03:07:31 +00:00
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
max_fired_watermark = next_fire_signal;
next_fire_signal = addTime(next_fire_signal, window_kind, window_num_units, *time_zone);
2020-03-01 18:08:52 +00:00
}
2020-08-02 18:07:01 +00:00
UInt64 timestamp_ms = static_cast<UInt64>(Poco::Timestamp().epochMicroseconds()) / 1000;
2020-03-01 18:08:52 +00:00
if (!shutdown_called)
fire_task->scheduleAfter(std::max(UInt64(0), static_cast<UInt64>(next_fire_signal) * 1000 - timestamp_ms));
2020-03-01 18:08:52 +00:00
}
void StorageWindowView::threadFuncFireEvent()
{
std::unique_lock lock(fire_signal_mutex);
while (!shutdown_called)
{
bool signaled = std::cv_status::no_timeout == fire_signal_condition.wait_for(lock, std::chrono::seconds(5));
if (!signaled)
continue;
2020-03-01 18:08:52 +00:00
while (!fire_signal.empty())
{
2020-03-01 18:08:52 +00:00
fire(fire_signal.front());
fire_signal.pop_front();
}
2020-02-14 08:07:03 +00:00
}
}
2020-01-14 03:07:31 +00:00
BlockInputStreams StorageWindowView::watch(
const Names & /*column_names*/,
const SelectQueryInfo & query_info,
2021-05-28 07:36:19 +00:00
ContextPtr local_context,
2020-01-14 03:07:31 +00:00
QueryProcessingStage::Enum & processed_stage,
size_t /*max_block_size*/,
const unsigned /*num_streams*/)
{
ASTWatchQuery & query = typeid_cast<ASTWatchQuery &>(*query_info.query);
bool has_limit = false;
UInt64 limit = 0;
if (query.limit_length)
{
has_limit = true;
limit = safeGet<UInt64>(typeid_cast<ASTLiteral &>(*query.limit_length).value);
}
2020-03-06 14:58:16 +00:00
auto reader = std::make_shared<WindowViewBlockInputStream>(
std::static_pointer_cast<StorageWindowView>(shared_from_this()),
has_limit,
limit,
2021-05-28 07:36:19 +00:00
local_context->getSettingsRef().window_view_heartbeat_interval.totalSeconds());
2020-01-14 03:07:31 +00:00
2020-03-03 04:42:12 +00:00
std::lock_guard lock(fire_signal_mutex);
watch_streams.push_back(reader);
2020-01-14 03:07:31 +00:00
processed_stage = QueryProcessingStage::Complete;
return {reader};
}
StorageWindowView::StorageWindowView(
2020-01-24 02:45:45 +00:00
const StorageID & table_id_,
2021-05-28 07:36:19 +00:00
ContextPtr context_,
2020-01-14 03:07:31 +00:00
const ASTCreateQuery & query,
2020-02-12 17:39:57 +00:00
const ColumnsDescription & columns_,
bool attach_)
2020-01-24 02:45:45 +00:00
: IStorage(table_id_)
2021-05-28 07:36:19 +00:00
, WithContext(context_->getGlobalContext())
2020-01-14 03:07:31 +00:00
{
2021-05-28 07:36:19 +00:00
window_view_context = Context::createCopy(getContext());
window_view_context->makeQueryContext();
2020-03-31 14:02:53 +00:00
2020-07-15 14:38:50 +00:00
StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(columns_);
setInMemoryMetadata(storage_metadata);
2020-01-14 03:07:31 +00:00
if (!query.select)
throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY);
if (query.select->list_of_selects->children.size() != 1)
throw Exception("UNION is not supported for Window View", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW);
2020-03-24 02:46:56 +00:00
ASTSelectQuery & select_query = typeid_cast<ASTSelectQuery &>(*query.select->list_of_selects->children.at(0));
2021-05-28 07:36:19 +00:00
String select_database_name = getContext()->getCurrentDatabase();
2020-01-24 02:45:45 +00:00
String select_table_name;
2020-01-14 03:07:31 +00:00
extractDependentTable(select_query, select_database_name, select_table_name);
2020-07-20 13:32:34 +00:00
/// If the table is not specified - use the table `system.one`
if (select_table_name.empty())
{
select_database_name = "system";
select_table_name = "one";
}
2020-01-24 02:45:45 +00:00
select_table_id = StorageID(select_database_name, select_table_name);
2020-07-20 13:32:34 +00:00
DatabaseCatalog::instance().addDependency(select_table_id, table_id_);
2020-07-27 04:10:52 +00:00
// Parse inner query
2020-07-19 19:00:21 +00:00
auto inner_query = innerQueryParser(select_query);
2020-01-14 03:07:31 +00:00
2020-07-27 04:10:52 +00:00
// Parse mergeable query
mergeable_query = inner_query->clone();
2020-07-19 18:56:55 +00:00
ReplaceFunctionNowData func_now_data;
ReplaceFunctionNowVisitor(func_now_data).visit(mergeable_query);
is_time_column_func_now = func_now_data.is_time_column_func_now;
if (is_time_column_func_now)
window_id_name = func_now_data.window_id_name;
2020-07-27 04:10:52 +00:00
// Parse final query
final_query = mergeable_query->clone();
2020-07-20 13:32:34 +00:00
ReplaceWindowIdMatcher::Data final_query_data;
if (is_tumble)
final_query_data.window_name = "TUMBLE";
else
final_query_data.window_name = "HOP";
ReplaceWindowIdMatcher::Visitor(final_query_data).visit(final_query);
2020-03-01 18:08:52 +00:00
is_watermark_strictly_ascending = query.is_watermark_strictly_ascending;
is_watermark_ascending = query.is_watermark_ascending;
is_watermark_bounded = query.is_watermark_bounded;
2020-06-09 08:48:04 +00:00
target_table_id = query.to_table_id;
2020-01-14 03:07:31 +00:00
2020-07-20 13:32:34 +00:00
eventTimeParser(query);
2020-01-14 03:07:31 +00:00
2020-07-20 13:32:34 +00:00
if (is_tumble)
window_column_name = std::regex_replace(window_id_name, std::regex("WINDOW_ID"), "TUMBLE");
else
window_column_name = std::regex_replace(window_id_name, std::regex("WINDOW_ID"), "HOP");
2020-03-03 04:42:12 +00:00
2020-07-21 09:43:42 +00:00
auto generate_inner_table_name = [](const String & table_name) { return ".inner." + table_name; };
if (attach_)
2020-02-12 17:39:57 +00:00
{
2020-07-21 09:43:42 +00:00
inner_table_id = StorageID(table_id_.database_name, generate_inner_table_name(table_id_.table_name));
}
else
{
auto inner_create_query
2021-05-28 07:36:19 +00:00
= getInnerTableCreateQuery(inner_query, query.storage, table_id_.database_name, generate_inner_table_name(table_id_.table_name));
2021-05-28 07:36:19 +00:00
InterpreterCreateQuery create_interpreter(inner_create_query, window_view_context);
create_interpreter.setInternal(true);
create_interpreter.execute();
2021-05-28 07:36:19 +00:00
inner_storage = DatabaseCatalog::instance().getTable(StorageID(inner_create_query->database, inner_create_query->table), getContext());
inner_table_id = inner_storage->getStorageID();
2020-02-12 17:39:57 +00:00
}
2021-05-28 07:36:19 +00:00
clean_interval = getContext()->getSettingsRef().window_view_clean_interval.totalSeconds();
2020-07-20 13:32:34 +00:00
next_fire_signal = getWindowUpperBound(std::time(nullptr));
2021-05-28 07:36:19 +00:00
clean_cache_task = window_view_context->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); });
2020-03-01 18:08:52 +00:00
if (is_proctime)
2021-05-28 07:36:19 +00:00
fire_task = window_view_context->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireProc(); });
2020-03-01 18:08:52 +00:00
else
2021-05-28 07:36:19 +00:00
fire_task = window_view_context->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireEvent(); });
2020-03-24 02:46:56 +00:00
clean_cache_task->deactivate();
fire_task->deactivate();
2020-01-14 03:07:31 +00:00
}
ASTPtr StorageWindowView::innerQueryParser(ASTSelectQuery & query)
{
if (!query.groupBy())
throw Exception("GROUP BY query is required for " + getName(), ErrorCodes::INCORRECT_QUERY);
2020-07-27 04:10:52 +00:00
// Parse stage mergeable
2020-01-14 03:07:31 +00:00
ASTPtr result = query.clone();
ASTPtr expr_list = result;
2021-05-28 07:36:19 +00:00
FetchQueryInfoMatcher::Data query_info_data;
FetchQueryInfoMatcher::Visitor(query_info_data).visit(expr_list);
if (!query_info_data.is_tumble && !query_info_data.is_hop)
2020-01-14 03:07:31 +00:00
throw Exception("WINDOW FUNCTION is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY);
2021-05-28 07:36:19 +00:00
window_id_name = query_info_data.window_id_name;
window_id_alias = query_info_data.window_id_alias;
timestamp_column_name = query_info_data.timestamp_column_name;
is_tumble = query_info_data.is_tumble;
2020-01-14 03:07:31 +00:00
2020-07-27 04:10:52 +00:00
// Parse window function
2021-05-28 07:36:19 +00:00
ASTFunction & window_function = typeid_cast<ASTFunction &>(*query_info_data.window_function);
2020-02-12 17:39:57 +00:00
const auto & arguments = window_function.arguments->children;
2020-07-20 13:32:34 +00:00
extractWindowArgument(
arguments.at(1),
window_kind,
window_num_units,
"Illegal type of second argument of function " + window_function.name + " should be Interval");
2020-03-01 18:08:52 +00:00
if (!is_tumble)
{
hop_kind = window_kind;
hop_num_units = window_num_units;
2020-07-20 13:32:34 +00:00
extractWindowArgument(
arguments.at(2),
window_kind,
window_num_units,
"Illegal type of third argument of function " + window_function.name + " should be Interval");
slice_num_units= std::gcd(hop_num_units, window_num_units);
2020-03-01 18:08:52 +00:00
}
2020-07-27 04:10:52 +00:00
2020-07-27 09:32:15 +00:00
// Parse time zone
2020-07-27 04:10:52 +00:00
size_t time_zone_arg_num = is_tumble ? 2 : 3;
if (arguments.size() > time_zone_arg_num)
{
const auto & ast = arguments.at(time_zone_arg_num);
const auto * time_zone_ast = ast->as<ASTLiteral>();
if (!time_zone_ast || time_zone_ast->value.getType() != Field::Types::String)
throw Exception(
"Illegal column #" + std::to_string(time_zone_arg_num) + " of time zone argument of function, must be constant string",
ErrorCodes::ILLEGAL_COLUMN);
time_zone = &DateLUT::instance(time_zone_ast->value.safeGet<String>());
}
else
time_zone = &DateLUT::instance();
2020-01-14 03:07:31 +00:00
return result;
}
2020-07-20 13:32:34 +00:00
void StorageWindowView::eventTimeParser(const ASTCreateQuery & query)
{
if (query.is_watermark_strictly_ascending || query.is_watermark_ascending || query.is_watermark_bounded)
{
is_proctime = false;
if (is_time_column_func_now)
throw Exception("now() is not support for Event time processing.", ErrorCodes::INCORRECT_QUERY);
if (query.is_watermark_ascending)
{
is_watermark_bounded = true;
watermark_kind = IntervalKind::Second;
watermark_num_units = 1;
}
else if (query.is_watermark_bounded)
{
extractWindowArgument(
query.watermark_function, watermark_kind, watermark_num_units, "Illegal type WATERMARK function should be Interval");
}
}
if (query.allowed_lateness)
{
allowed_lateness = true;
extractWindowArgument(
query.lateness_function, lateness_kind, lateness_num_units, "Illegal type ALLOWED_LATENESS function should be Interval");
}
}
2021-05-28 07:36:19 +00:00
void StorageWindowView::writeIntoWindowView(
StorageWindowView & window_view,
const Block & block,
ContextPtr local_context)
2020-01-14 03:07:31 +00:00
{
2020-02-22 17:06:10 +00:00
Pipe pipe(std::make_shared<SourceFromSingleChunk>(block.cloneEmpty(), Chunk(block.getColumns(), block.rows())));
2020-03-01 18:08:52 +00:00
BlockInputStreamPtr source_stream;
2020-02-21 12:35:26 +00:00
UInt32 lateness_bound = 0;
UInt32 t_max_watermark = 0;
UInt32 t_max_timestamp = 0;
UInt32 t_max_fired_watermark = 0;
{
std::lock_guard lock(window_view.fire_signal_mutex);
t_max_fired_watermark = window_view.max_fired_watermark;
t_max_watermark = window_view.max_watermark;
t_max_timestamp = window_view.max_timestamp;
}
// Filter outdated data
if (window_view.allowed_lateness && t_max_timestamp != 0)
{
lateness_bound
2020-07-26 19:11:27 +00:00
= addTime(t_max_timestamp, window_view.lateness_kind, -1 * window_view.lateness_num_units, *window_view.time_zone);
if (window_view.is_watermark_bounded)
{
UInt32 watermark_lower_bound = window_view.is_tumble
2020-07-26 19:11:27 +00:00
? addTime(t_max_watermark, window_view.window_kind, -1 * window_view.window_num_units, *window_view.time_zone)
: addTime(t_max_watermark, window_view.hop_kind, -1 * window_view.hop_num_units, *window_view.time_zone);
if (watermark_lower_bound < lateness_bound)
lateness_bound = watermark_lower_bound;
}
}
else if (! window_view.is_time_column_func_now)
{
lateness_bound = t_max_fired_watermark;
}
if (lateness_bound > 0)
{
2021-05-28 07:36:19 +00:00
ASTPtr args = std::make_shared<ASTExpressionList>();
args->children.push_back(std::make_shared<ASTIdentifier>(window_view.timestamp_column_name));
args->children.push_back(std::make_shared<ASTLiteral>(lateness_bound));
auto filter_function = std::make_shared<ASTFunction>();
filter_function->name = "greaterOrEquals";
filter_function->arguments = args;
filter_function->children.push_back(filter_function->arguments);
ASTPtr query = filter_function;
NamesAndTypesList columns;
columns.emplace_back(window_view.timestamp_column_name, std::make_shared<DataTypeDateTime>());
auto syntax_result = TreeRewriter(local_context).analyze(query, columns);
auto filter_expression = ExpressionAnalyzer(filter_function, syntax_result, local_context).getActionsDAG(false);
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<FilterTransform>(header, std::make_shared<ExpressionActions>(filter_expression), filter_function->getColumnName(), true);
});
}
2020-02-21 12:35:26 +00:00
std::shared_lock<std::shared_mutex> fire_signal_lock;
if (window_view.is_proctime)
2020-02-20 17:30:58 +00:00
{
2020-02-21 12:35:26 +00:00
fire_signal_lock = std::shared_lock<std::shared_mutex>(window_view.fire_signal_mutex);
if (window_view.is_time_column_func_now)
2021-05-28 07:36:19 +00:00
{
ColumnWithTypeAndName column;
column.name = "____timestamp";
column.type = std::make_shared<DataTypeDateTime>();
column.column = column.type->createColumnConst(0, Field(std::time(nullptr)));
auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
auto adding_column_actions = std::make_shared<ExpressionActions>(
std::move(adding_column_dag),
ExpressionActionsSettings::fromContext(local_context));
pipe.addSimpleTransform([&](const Block & stream_header)
{
return std::make_shared<ExpressionTransform>(stream_header, adding_column_actions);
});
}
InterpreterSelectQuery select_block(window_view.getMergeableQuery(), local_context, {std::move(pipe)}, QueryProcessingStage::WithMergeableState);
2020-03-01 18:08:52 +00:00
2020-06-09 08:48:04 +00:00
source_stream = select_block.execute().getInputStream();
2020-03-01 18:08:52 +00:00
source_stream = std::make_shared<SquashingBlockInputStream>(
2021-05-28 07:36:19 +00:00
source_stream, local_context->getSettingsRef().min_insert_block_size_rows, local_context->getSettingsRef().min_insert_block_size_bytes);
2020-02-20 17:30:58 +00:00
}
2020-02-17 05:06:03 +00:00
else
2020-02-20 17:30:58 +00:00
{
2021-05-28 07:36:19 +00:00
InterpreterSelectQuery select_block(window_view.getMergeableQuery(), local_context, {std::move(pipe)}, QueryProcessingStage::WithMergeableState);
2020-03-01 18:08:52 +00:00
2020-06-09 08:48:04 +00:00
source_stream = select_block.execute().getInputStream();
2020-03-01 18:08:52 +00:00
source_stream = std::make_shared<SquashingBlockInputStream>(
2021-05-28 07:36:19 +00:00
source_stream, local_context->getSettingsRef().min_insert_block_size_rows, local_context->getSettingsRef().min_insert_block_size_bytes);
source_stream = std::make_shared<WatermarkBlockInputStream>(source_stream, window_view, window_view.window_id_name);
2020-03-01 18:08:52 +00:00
if (window_view.is_watermark_bounded || window_view.allowed_lateness)
{
UInt32 block_max_timestamp = 0;
if (window_view.is_watermark_bounded || window_view.allowed_lateness)
{
2020-07-20 13:32:34 +00:00
const auto & timestamp_data = typeid_cast<const ColumnUInt32 &>(*block.getByName(window_view.timestamp_column_name).column).getData();
for (const auto & timestamp : timestamp_data)
{
if (timestamp > block_max_timestamp)
block_max_timestamp = timestamp;
}
}
std::static_pointer_cast<WatermarkBlockInputStream>(source_stream)->setMaxTimestamp(block_max_timestamp);
}
2020-02-14 08:07:03 +00:00
2020-03-24 02:46:56 +00:00
if (window_view.allowed_lateness && t_max_fired_watermark != 0)
std::static_pointer_cast<WatermarkBlockInputStream>(source_stream)->setAllowedLateness(t_max_fired_watermark);
2020-03-03 04:42:12 +00:00
}
2020-02-12 17:39:57 +00:00
auto & inner_storage = window_view.getInnerStorage();
2020-07-15 14:38:50 +00:00
auto metadata_snapshot = inner_storage->getInMemoryMetadataPtr();
2021-05-28 07:36:19 +00:00
auto lock = inner_storage->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout);
auto stream = inner_storage->write(window_view.getMergeableQuery(), metadata_snapshot, local_context);
copyData(*source_stream, *stream);
2020-01-14 03:07:31 +00:00
}
void StorageWindowView::startup()
{
// Start the working thread
2020-03-24 02:46:56 +00:00
clean_cache_task->activateAndSchedule();
fire_task->activateAndSchedule();
2020-01-14 03:07:31 +00:00
}
void StorageWindowView::shutdown()
{
bool expected = false;
if (!shutdown_called.compare_exchange_strong(expected, true))
return;
2020-03-24 02:46:56 +00:00
clean_cache_task->deactivate();
fire_task->deactivate();
2020-01-14 03:07:31 +00:00
}
StorageWindowView::~StorageWindowView()
{
shutdown();
}
2020-03-03 04:42:12 +00:00
Block & StorageWindowView::getHeader() const
{
std::lock_guard lock(sample_block_lock);
if (!sample_block)
{
sample_block = InterpreterSelectQuery(
2021-05-28 07:36:19 +00:00
getFinalQuery(), window_view_context, getParentStorage(), nullptr, SelectQueryOptions(QueryProcessingStage::Complete))
2020-03-03 04:42:12 +00:00
.getSampleBlock();
for (size_t i = 0; i < sample_block.columns(); ++i)
sample_block.safeGetByPosition(i).column = sample_block.safeGetByPosition(i).column->convertToFullColumnIfConst();
}
return sample_block;
}
StoragePtr StorageWindowView::getParentStorage() const
{
if (parent_storage == nullptr)
2021-05-28 07:36:19 +00:00
parent_storage = DatabaseCatalog::instance().getTable(select_table_id, getContext());
2020-03-03 04:42:12 +00:00
return parent_storage;
}
StoragePtr & StorageWindowView::getInnerStorage() const
{
if (inner_storage == nullptr)
2021-05-28 07:36:19 +00:00
inner_storage = DatabaseCatalog::instance().getTable(inner_table_id, getContext());
2020-03-03 04:42:12 +00:00
return inner_storage;
}
ASTPtr StorageWindowView::getFetchColumnQuery(UInt32 w_start, UInt32 w_end) const
2020-03-31 05:48:01 +00:00
{
auto res_query = std::make_shared<ASTSelectQuery>();
auto select = std::make_shared<ASTExpressionList>();
select->children.push_back(std::make_shared<ASTAsterisk>());
res_query->setExpression(ASTSelectQuery::Expression::SELECT, select);
res_query->setExpression(ASTSelectQuery::Expression::TABLES, std::make_shared<ASTTablesInSelectQuery>());
auto tables_elem = std::make_shared<ASTTablesInSelectQueryElement>();
auto table_expr = std::make_shared<ASTTableExpression>();
res_query->tables()->children.push_back(tables_elem);
tables_elem->table_expression = table_expr;
tables_elem->children.push_back(table_expr);
table_expr->database_and_table_name = createTableIdentifier(inner_table_id.database_name, inner_table_id.table_name);
table_expr->children.push_back(table_expr->database_and_table_name);
if (is_tumble)
{
auto func_equals = makeASTFunction("equals", std::make_shared<ASTIdentifier>(window_id_name), std::make_shared<ASTLiteral>(w_end));
res_query->setExpression(ASTSelectQuery::Expression::PREWHERE, func_equals);
}
else
{
auto func_array = makeASTFunction("array");
while (w_end > w_start)
{
func_array ->arguments->children.push_back(std::make_shared<ASTLiteral>(w_end));
2020-07-27 04:10:52 +00:00
w_end = addTime(w_end, window_kind, -1 * slice_num_units, *time_zone);
}
auto func_has = makeASTFunction("has", func_array, std::make_shared<ASTIdentifier>(window_id_name));
res_query->setExpression(ASTSelectQuery::Expression::PREWHERE, func_has);
}
2020-03-31 05:48:01 +00:00
return res_query;
}
2020-03-03 04:42:12 +00:00
StoragePtr & StorageWindowView::getTargetStorage() const
{
if (target_storage == nullptr && !target_table_id.empty())
2021-05-28 07:36:19 +00:00
target_storage = DatabaseCatalog::instance().getTable(target_table_id, getContext());
2020-03-03 04:42:12 +00:00
return target_storage;
}
2020-03-01 18:08:52 +00:00
BlockInputStreamPtr StorageWindowView::getNewBlocksInputStreamPtr(UInt32 watermark)
2020-01-14 03:07:31 +00:00
{
2020-07-27 04:10:52 +00:00
UInt32 w_start = addTime(watermark, window_kind, -1 * window_num_units, *time_zone);
2020-07-15 14:38:50 +00:00
InterpreterSelectQuery fetch(
getFetchColumnQuery(w_start, watermark),
2021-05-28 07:36:19 +00:00
window_view_context,
2020-07-15 14:38:50 +00:00
getInnerStorage(),
nullptr,
SelectQueryOptions(QueryProcessingStage::FetchColumns));
2020-06-09 08:48:04 +00:00
BlockInputStreamPtr in_stream = fetch.execute().getInputStream();
2020-01-14 03:07:31 +00:00
in_stream = std::make_shared<ReplaceWindowColumnBlockInputStream>(in_stream, window_column_name, w_start, watermark);
2021-05-28 07:36:19 +00:00
Pipe pipe(std::make_shared<SourceFromInputStream>(std::move(in_stream)));
2020-01-14 03:07:31 +00:00
2020-07-15 14:38:50 +00:00
auto parent_table_metadata = getParentStorage()->getInMemoryMetadataPtr();
2020-02-12 17:39:57 +00:00
auto proxy_storage = std::make_shared<WindowViewProxyStorage>(
2021-05-28 07:36:19 +00:00
StorageID(getStorageID().database_name, "WindowViewProxyStorage"), parent_table_metadata->getColumns(), std::move(pipe), QueryProcessingStage::WithMergeableState);
InterpreterSelectQuery select(getFinalQuery(), window_view_context, proxy_storage, nullptr, SelectQueryOptions(QueryProcessingStage::Complete));
BlockInputStreamPtr data = std::make_shared<MaterializingBlockInputStream>(select.execute().getInputStream());
2020-06-09 11:37:33 +00:00
2020-02-22 17:06:10 +00:00
data = std::make_shared<SquashingBlockInputStream>(
2021-05-28 07:36:19 +00:00
data, getContext()->getSettingsRef().min_insert_block_size_rows,
getContext()->getSettingsRef().min_insert_block_size_bytes);
2020-02-12 17:39:57 +00:00
return data;
2020-01-14 03:07:31 +00:00
}
void registerStorageWindowView(StorageFactory & factory)
{
2020-01-14 09:53:52 +00:00
factory.registerStorage("WindowView", [](const StorageFactory::Arguments & args)
{
2021-05-28 07:36:19 +00:00
if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_window_view)
2020-01-14 03:07:31 +00:00
throw Exception(
"Experimental WINDOW VIEW feature is not enabled (the setting 'allow_experimental_window_view')",
ErrorCodes::SUPPORT_IS_DISABLED);
2020-01-24 02:45:45 +00:00
2021-05-28 07:36:19 +00:00
return StorageWindowView::create(args.table_id, args.getLocalContext(), args.query, args.columns, args.attach);
2020-01-14 03:07:31 +00:00
});
}
2020-02-12 17:39:57 +00:00
2020-01-14 16:24:26 +00:00
}