mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-21 01:00:48 +00:00
Prevent from using non-deterministic functions in mutations of replicated tables (#7247)
* Added isDeterministic() to IFunctionBuilder i-face * A test for non-deterministic mutations * In MutationsInterpreter::validate() ensure deterministic functions * Adjusted nondeterministic mutation tests * Moved around some code in MutationsInterpreter::validate() * Dropped unnecessary check in findFirstNonDeterministicFuncName() * Corrections to IFunction{Base,Builder} interface
This commit is contained in:
parent
0769f80f4e
commit
03c7b7ff8d
@ -1540,6 +1540,9 @@ public:
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isDeterministic() const override { return true; }
|
||||
bool isDeterministicInScopeOfQuery() const override { return true; }
|
||||
|
||||
bool hasInformationAboutMonotonicity() const override
|
||||
{
|
||||
return static_cast<bool>(monotonicity_for_range);
|
||||
|
@ -26,6 +26,9 @@ public:
|
||||
|
||||
String getName() const override { return "FunctionExpression"; }
|
||||
|
||||
bool isDeterministic() const override { return true; }
|
||||
bool isDeterministicInScopeOfQuery() const override { return true; }
|
||||
|
||||
const DataTypes & getArgumentTypes() const override { return argument_types; }
|
||||
const DataTypePtr & getReturnType() const override { return return_type; }
|
||||
|
||||
@ -110,6 +113,9 @@ public:
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isDeterministic() const override { return true; }
|
||||
bool isDeterministicInScopeOfQuery() const override { return true; }
|
||||
|
||||
const DataTypes & getArgumentTypes() const override { return captured_types; }
|
||||
const DataTypePtr & getReturnType() const override { return return_type; }
|
||||
|
||||
|
@ -53,9 +53,11 @@ public:
|
||||
return name;
|
||||
}
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
bool isDeterministicInScopeOfQuery() const override { return false; }
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
bool isDeterministicInScopeOfQuery() const override { return false; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
|
@ -198,9 +198,9 @@ public:
|
||||
* Example: now(). Another example: functions that work with periodically updated dictionaries.
|
||||
*/
|
||||
|
||||
virtual bool isDeterministic() const { return true; }
|
||||
virtual bool isDeterministic() const = 0;
|
||||
|
||||
virtual bool isDeterministicInScopeOfQuery() const { return true; }
|
||||
virtual bool isDeterministicInScopeOfQuery() const = 0;
|
||||
|
||||
/** Lets you know if the function is monotonic in a range of values.
|
||||
* This is used to work with the index in a sorted chunk of data.
|
||||
@ -240,11 +240,16 @@ public:
|
||||
/// Get the main function name.
|
||||
virtual String getName() const = 0;
|
||||
|
||||
/// See the comment for the same method in IFunctionBase
|
||||
virtual bool isDeterministic() const = 0;
|
||||
|
||||
virtual bool isDeterministicInScopeOfQuery() const = 0;
|
||||
|
||||
/// Override and return true if function needs to depend on the state of the data.
|
||||
virtual bool isStateful() const { return false; }
|
||||
virtual bool isStateful() const = 0;
|
||||
|
||||
/// Override and return true if function could take different number of arguments.
|
||||
virtual bool isVariadic() const { return false; }
|
||||
virtual bool isVariadic() const = 0;
|
||||
|
||||
/// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored).
|
||||
virtual size_t getNumberOfArguments() const = 0;
|
||||
@ -277,6 +282,11 @@ public:
|
||||
return buildImpl(arguments, getReturnType(arguments));
|
||||
}
|
||||
|
||||
bool isDeterministic() const override { return true; }
|
||||
bool isDeterministicInScopeOfQuery() const override { return true; }
|
||||
bool isStateful() const override { return false; }
|
||||
bool isVariadic() const override { return false; }
|
||||
|
||||
/// Default implementation. Will check only in non-variadic case.
|
||||
void checkNumberOfArguments(size_t number_of_arguments) const override;
|
||||
|
||||
@ -357,6 +367,8 @@ public:
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; }
|
||||
bool canBeExecutedOnDefaultArguments() const override { return true; }
|
||||
bool canBeExecutedOnLowCardinalityDictionary() const override { return isDeterministicInScopeOfQuery(); }
|
||||
bool isDeterministic() const override { return true; }
|
||||
bool isDeterministicInScopeOfQuery() const override { return true; }
|
||||
|
||||
using PreparedFunctionImpl::execute;
|
||||
using PreparedFunctionImpl::executeImplDryRun;
|
||||
@ -506,6 +518,9 @@ public:
|
||||
return function->checkNumberOfArguments(number_of_arguments);
|
||||
}
|
||||
|
||||
bool isDeterministic() const override { return function->isDeterministic(); }
|
||||
bool isDeterministicInScopeOfQuery() const override { return function->isDeterministicInScopeOfQuery(); }
|
||||
|
||||
String getName() const override { return function->getName(); }
|
||||
bool isStateful() const override { return function->isStateful(); }
|
||||
bool isVariadic() const override { return function->isVariadic(); }
|
||||
|
@ -53,6 +53,7 @@ public:
|
||||
}
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
bool isDeterministicInScopeOfQuery() const override { return true; }
|
||||
|
||||
private:
|
||||
time_t time_value;
|
||||
@ -65,8 +66,10 @@ public:
|
||||
static constexpr auto name = "now";
|
||||
|
||||
String getName() const override { return name; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
static FunctionBuilderPtr create(const Context &) { return std::make_shared<FunctionBuilderNow>(); }
|
||||
|
||||
protected:
|
||||
|
@ -50,6 +50,7 @@ public:
|
||||
}
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
bool isDeterministicInScopeOfQuery() const override { return true; }
|
||||
|
||||
private:
|
||||
ToType value;
|
||||
@ -64,6 +65,8 @@ public:
|
||||
static constexpr auto name = Name::name;
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
|
@ -141,6 +141,7 @@ public:
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
bool isDeterministicInScopeOfQuery() const override
|
||||
{
|
||||
return false;
|
||||
|
@ -38,6 +38,9 @@ public:
|
||||
static constexpr auto name = "toTypeName";
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isDeterministic() const override { return true; }
|
||||
bool isDeterministicInScopeOfQuery() const override { return true; }
|
||||
|
||||
const DataTypes & getArgumentTypes() const override { return argument_types; }
|
||||
const DataTypePtr & getReturnType() const override { return return_type; }
|
||||
|
||||
|
@ -52,6 +52,7 @@ public:
|
||||
}
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
bool isDeterministicInScopeOfQuery() const override { return true; }
|
||||
|
||||
private:
|
||||
DayNum day_value;
|
||||
@ -64,6 +65,9 @@ public:
|
||||
static constexpr auto name = "today";
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
static FunctionBuilderPtr create(const Context &) { return std::make_shared<FunctionBuilderToday>(); }
|
||||
|
@ -52,6 +52,7 @@ public:
|
||||
}
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
bool isDeterministicInScopeOfQuery() const override { return true; }
|
||||
|
||||
private:
|
||||
DayNum day_value;
|
||||
@ -64,6 +65,9 @@ public:
|
||||
static constexpr auto name = "yesterday";
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
static FunctionBuilderPtr create(const Context &) { return std::make_shared<FunctionBuilderYesterday>(); }
|
||||
|
@ -1,6 +1,11 @@
|
||||
#include "MutationsInterpreter.h"
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Interpreters/InterpreterSelectQuery.h>
|
||||
#include <Interpreters/MutationsInterpreter.h>
|
||||
#include <Interpreters/SyntaxAnalyzer.h>
|
||||
#include <Interpreters/InterpreterSelectQuery.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <DataStreams/FilterBlockInputStream.h>
|
||||
#include <DataStreams/ExpressionBlockInputStream.h>
|
||||
@ -14,7 +19,6 @@
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include "MutationsInterpreter.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -27,6 +31,67 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_UPDATE_COLUMN;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
struct FirstNonDeterministicFuncData
|
||||
{
|
||||
using TypeToVisit = ASTFunction;
|
||||
|
||||
explicit FirstNonDeterministicFuncData(const Context & context_)
|
||||
: context{context_}
|
||||
{}
|
||||
|
||||
const Context & context;
|
||||
std::optional<String> nondeterministic_function_name;
|
||||
|
||||
void visit(ASTFunction & function, ASTPtr &)
|
||||
{
|
||||
if (nondeterministic_function_name)
|
||||
return;
|
||||
|
||||
const auto func = FunctionFactory::instance().get(function.name, context);
|
||||
if (!func->isDeterministic())
|
||||
nondeterministic_function_name = func->getName();
|
||||
}
|
||||
};
|
||||
|
||||
using FirstNonDeterministicFuncFinder =
|
||||
InDepthNodeVisitor<OneTypeMatcher<FirstNonDeterministicFuncData>, true>;
|
||||
|
||||
std::optional<String> findFirstNonDeterministicFuncName(const MutationCommand & command, const Context & context)
|
||||
{
|
||||
FirstNonDeterministicFuncData finder_data(context);
|
||||
|
||||
switch (command.type)
|
||||
{
|
||||
case MutationCommand::UPDATE:
|
||||
{
|
||||
auto update_assignments_ast = command.ast->as<const ASTAlterCommand &>().update_assignments->clone();
|
||||
FirstNonDeterministicFuncFinder(finder_data).visit(update_assignments_ast);
|
||||
|
||||
if (finder_data.nondeterministic_function_name)
|
||||
return finder_data.nondeterministic_function_name;
|
||||
|
||||
[[fallthrough]];
|
||||
}
|
||||
|
||||
case MutationCommand::DELETE:
|
||||
{
|
||||
auto predicate_ast = command.predicate->clone();
|
||||
FirstNonDeterministicFuncFinder(finder_data).visit(predicate_ast);
|
||||
|
||||
return finder_data.nondeterministic_function_name;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
bool MutationsInterpreter::isStorageTouchedByMutations() const
|
||||
{
|
||||
if (commands.empty())
|
||||
@ -440,6 +505,21 @@ BlockInputStreamPtr MutationsInterpreter::addStreamsForLaterStages(const std::ve
|
||||
|
||||
void MutationsInterpreter::validate(TableStructureReadLockHolder &)
|
||||
{
|
||||
/// For Replicated* storages mutations cannot employ non-deterministic functions
|
||||
/// because that produces inconsistencies between replicas
|
||||
if (startsWith(storage->getName(), "Replicated"))
|
||||
{
|
||||
for (const auto & command : commands)
|
||||
{
|
||||
const auto nondeterministic_func_name = findFirstNonDeterministicFuncName(command, context);
|
||||
if (nondeterministic_func_name)
|
||||
throw Exception(
|
||||
"ALTER UPDATE/ALTER DELETE statements must use only deterministic functions! "
|
||||
"Function '" + *nondeterministic_func_name + "' is non-deterministic",
|
||||
ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
}
|
||||
|
||||
const auto & select_query = prepare(/* dry_run = */ true);
|
||||
InterpreterSelectQuery interpreter{select_query, context, storage, SelectQueryOptions().analyze(/* dry_run = */ true).ignoreLimits()};
|
||||
/// Do not use getSampleBlock in order to check the whole pipeline.
|
||||
|
@ -0,0 +1,4 @@
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. $CURDIR/../shell_config.sh
|
||||
|
||||
|
||||
R1=table_1017_1
|
||||
R2=table_1017_2
|
||||
T1=table_1017_merge
|
||||
|
||||
${CLICKHOUSE_CLIENT} -n -q "
|
||||
DROP TABLE IF EXISTS $R1;
|
||||
DROP TABLE IF EXISTS $R2;
|
||||
|
||||
CREATE TABLE $R1 (x UInt32, y UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/${CLICKHOUSE_DATABASE}.table_1017', 'r1') ORDER BY x;
|
||||
CREATE TABLE $R2 (x UInt32, y UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/${CLICKHOUSE_DATABASE}.table_1017', 'r2') ORDER BY x;
|
||||
CREATE TABLE $T1 (x UInt32, y UInt32) ENGINE MergeTree() ORDER BY x;
|
||||
|
||||
INSERT INTO $R1 VALUES (0, 1)(1, 2)(2, 3)(3, 4);
|
||||
INSERT INTO $T1 VALUES (0, 1)(1, 2)(2, 3)(3, 4);
|
||||
"
|
||||
|
||||
# Check that in mutations of replicated tables predicates do not contain non-deterministic functions
|
||||
${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 DELETE WHERE ignore(rand())" 2>&1 \
|
||||
| fgrep -q "must use only deterministic functions" && echo 'OK' || echo 'FAIL'
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 UPDATE y = y + rand() % 1 WHERE not ignore()" 2>&1 \
|
||||
| fgrep -q "must use only deterministic functions" && echo 'OK' || echo 'FAIL'
|
||||
|
||||
|
||||
# For regular tables we do not enforce deterministic functions
|
||||
${CLICKHOUSE_CLIENT} --query "ALTER TABLE $T1 DELETE WHERE rand() = 0" 2>&1 > /dev/null \
|
||||
&& echo 'OK' || echo 'FAIL'
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "ALTER TABLE $T1 UPDATE y = y + rand() % 1 WHERE not ignore()" 2>&1 > /dev/null \
|
||||
&& echo 'OK' || echo 'FAIL'
|
||||
|
||||
|
||||
${CLICKHOUSE_CLIENT} -n -q "
|
||||
DROP TABLE IF EXISTS $R2;
|
||||
DROP TABLE IF EXISTS $R1;
|
||||
DROP TABLE IF EXISTS $T1;
|
||||
"
|
Loading…
Reference in New Issue
Block a user