Prevent from using non-deterministic functions in mutations of replicated tables (#7247)

* Added isDeterministic() to IFunctionBuilder i-face
* A test for non-deterministic mutations
* In MutationsInterpreter::validate() ensure deterministic functions
* Adjusted nondeterministic mutation tests
* Moved around some code in MutationsInterpreter::validate()
* Dropped unnecessary check in findFirstNonDeterministicFuncName()
* Corrections to IFunction{Base,Builder} interface
This commit is contained in:
Alexander Kazakov 2019-10-10 17:38:08 +03:00 committed by GitHub
parent 0769f80f4e
commit 03c7b7ff8d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 179 additions and 8 deletions

View File

@ -1540,6 +1540,9 @@ public:
String getName() const override { return name; }
bool isDeterministic() const override { return true; }
bool isDeterministicInScopeOfQuery() const override { return true; }
bool hasInformationAboutMonotonicity() const override
{
return static_cast<bool>(monotonicity_for_range);

View File

@ -26,6 +26,9 @@ public:
String getName() const override { return "FunctionExpression"; }
bool isDeterministic() const override { return true; }
bool isDeterministicInScopeOfQuery() const override { return true; }
const DataTypes & getArgumentTypes() const override { return argument_types; }
const DataTypePtr & getReturnType() const override { return return_type; }
@ -110,6 +113,9 @@ public:
String getName() const override { return name; }
bool isDeterministic() const override { return true; }
bool isDeterministicInScopeOfQuery() const override { return true; }
const DataTypes & getArgumentTypes() const override { return captured_types; }
const DataTypePtr & getReturnType() const override { return return_type; }

View File

@ -53,9 +53,11 @@ public:
return name;
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return false; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool isDeterministicInScopeOfQuery() const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{

View File

@ -198,9 +198,9 @@ public:
* Example: now(). Another example: functions that work with periodically updated dictionaries.
*/
virtual bool isDeterministic() const { return true; }
virtual bool isDeterministic() const = 0;
virtual bool isDeterministicInScopeOfQuery() const { return true; }
virtual bool isDeterministicInScopeOfQuery() const = 0;
/** Lets you know if the function is monotonic in a range of values.
* This is used to work with the index in a sorted chunk of data.
@ -240,11 +240,16 @@ public:
/// Get the main function name.
virtual String getName() const = 0;
/// See the comment for the same method in IFunctionBase
virtual bool isDeterministic() const = 0;
virtual bool isDeterministicInScopeOfQuery() const = 0;
/// Override and return true if function needs to depend on the state of the data.
virtual bool isStateful() const { return false; }
virtual bool isStateful() const = 0;
/// Override and return true if function could take different number of arguments.
virtual bool isVariadic() const { return false; }
virtual bool isVariadic() const = 0;
/// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored).
virtual size_t getNumberOfArguments() const = 0;
@ -277,6 +282,11 @@ public:
return buildImpl(arguments, getReturnType(arguments));
}
bool isDeterministic() const override { return true; }
bool isDeterministicInScopeOfQuery() const override { return true; }
bool isStateful() const override { return false; }
bool isVariadic() const override { return false; }
/// Default implementation. Will check only in non-variadic case.
void checkNumberOfArguments(size_t number_of_arguments) const override;
@ -357,6 +367,8 @@ public:
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; }
bool canBeExecutedOnDefaultArguments() const override { return true; }
bool canBeExecutedOnLowCardinalityDictionary() const override { return isDeterministicInScopeOfQuery(); }
bool isDeterministic() const override { return true; }
bool isDeterministicInScopeOfQuery() const override { return true; }
using PreparedFunctionImpl::execute;
using PreparedFunctionImpl::executeImplDryRun;
@ -506,6 +518,9 @@ public:
return function->checkNumberOfArguments(number_of_arguments);
}
bool isDeterministic() const override { return function->isDeterministic(); }
bool isDeterministicInScopeOfQuery() const override { return function->isDeterministicInScopeOfQuery(); }
String getName() const override { return function->getName(); }
bool isStateful() const override { return function->isStateful(); }
bool isVariadic() const override { return function->isVariadic(); }

View File

@ -53,6 +53,7 @@ public:
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return true; }
private:
time_t time_value;
@ -65,8 +66,10 @@ public:
static constexpr auto name = "now";
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 0; }
bool isDeterministic() const override { return false; }
size_t getNumberOfArguments() const override { return 0; }
static FunctionBuilderPtr create(const Context &) { return std::make_shared<FunctionBuilderNow>(); }
protected:

View File

@ -50,6 +50,7 @@ public:
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return true; }
private:
ToType value;
@ -64,6 +65,8 @@ public:
static constexpr auto name = Name::name;
String getName() const override { return name; }
bool isDeterministic() const override { return false; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }

View File

@ -141,6 +141,7 @@ public:
return 1;
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override
{
return false;

View File

@ -38,6 +38,9 @@ public:
static constexpr auto name = "toTypeName";
String getName() const override { return name; }
bool isDeterministic() const override { return true; }
bool isDeterministicInScopeOfQuery() const override { return true; }
const DataTypes & getArgumentTypes() const override { return argument_types; }
const DataTypePtr & getReturnType() const override { return return_type; }

View File

@ -52,6 +52,7 @@ public:
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return true; }
private:
DayNum day_value;
@ -64,6 +65,9 @@ public:
static constexpr auto name = "today";
String getName() const override { return name; }
bool isDeterministic() const override { return false; }
size_t getNumberOfArguments() const override { return 0; }
static FunctionBuilderPtr create(const Context &) { return std::make_shared<FunctionBuilderToday>(); }

View File

@ -52,6 +52,7 @@ public:
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return true; }
private:
DayNum day_value;
@ -64,6 +65,9 @@ public:
static constexpr auto name = "yesterday";
String getName() const override { return name; }
bool isDeterministic() const override { return false; }
size_t getNumberOfArguments() const override { return 0; }
static FunctionBuilderPtr create(const Context &) { return std::make_shared<FunctionBuilderYesterday>(); }

View File

@ -1,6 +1,11 @@
#include "MutationsInterpreter.h"
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/MutationsInterpreter.h>
#include <Interpreters/SyntaxAnalyzer.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <DataStreams/FilterBlockInputStream.h>
#include <DataStreams/ExpressionBlockInputStream.h>
@ -14,7 +19,6 @@
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/formatAST.h>
#include <IO/WriteHelpers.h>
#include "MutationsInterpreter.h"
namespace DB
@ -27,6 +31,67 @@ namespace ErrorCodes
extern const int CANNOT_UPDATE_COLUMN;
}
namespace
{
struct FirstNonDeterministicFuncData
{
using TypeToVisit = ASTFunction;
explicit FirstNonDeterministicFuncData(const Context & context_)
: context{context_}
{}
const Context & context;
std::optional<String> nondeterministic_function_name;
void visit(ASTFunction & function, ASTPtr &)
{
if (nondeterministic_function_name)
return;
const auto func = FunctionFactory::instance().get(function.name, context);
if (!func->isDeterministic())
nondeterministic_function_name = func->getName();
}
};
using FirstNonDeterministicFuncFinder =
InDepthNodeVisitor<OneTypeMatcher<FirstNonDeterministicFuncData>, true>;
std::optional<String> findFirstNonDeterministicFuncName(const MutationCommand & command, const Context & context)
{
FirstNonDeterministicFuncData finder_data(context);
switch (command.type)
{
case MutationCommand::UPDATE:
{
auto update_assignments_ast = command.ast->as<const ASTAlterCommand &>().update_assignments->clone();
FirstNonDeterministicFuncFinder(finder_data).visit(update_assignments_ast);
if (finder_data.nondeterministic_function_name)
return finder_data.nondeterministic_function_name;
[[fallthrough]];
}
case MutationCommand::DELETE:
{
auto predicate_ast = command.predicate->clone();
FirstNonDeterministicFuncFinder(finder_data).visit(predicate_ast);
return finder_data.nondeterministic_function_name;
}
default:
break;
}
return {};
}
};
bool MutationsInterpreter::isStorageTouchedByMutations() const
{
if (commands.empty())
@ -440,6 +505,21 @@ BlockInputStreamPtr MutationsInterpreter::addStreamsForLaterStages(const std::ve
void MutationsInterpreter::validate(TableStructureReadLockHolder &)
{
/// For Replicated* storages mutations cannot employ non-deterministic functions
/// because that produces inconsistencies between replicas
if (startsWith(storage->getName(), "Replicated"))
{
for (const auto & command : commands)
{
const auto nondeterministic_func_name = findFirstNonDeterministicFuncName(command, context);
if (nondeterministic_func_name)
throw Exception(
"ALTER UPDATE/ALTER DELETE statements must use only deterministic functions! "
"Function '" + *nondeterministic_func_name + "' is non-deterministic",
ErrorCodes::BAD_ARGUMENTS);
}
}
const auto & select_query = prepare(/* dry_run = */ true);
InterpreterSelectQuery interpreter{select_query, context, storage, SelectQueryOptions().analyze(/* dry_run = */ true).ignoreLimits()};
/// Do not use getSampleBlock in order to check the whole pipeline.

View File

@ -0,0 +1,43 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
R1=table_1017_1
R2=table_1017_2
T1=table_1017_merge
${CLICKHOUSE_CLIENT} -n -q "
DROP TABLE IF EXISTS $R1;
DROP TABLE IF EXISTS $R2;
CREATE TABLE $R1 (x UInt32, y UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/${CLICKHOUSE_DATABASE}.table_1017', 'r1') ORDER BY x;
CREATE TABLE $R2 (x UInt32, y UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/${CLICKHOUSE_DATABASE}.table_1017', 'r2') ORDER BY x;
CREATE TABLE $T1 (x UInt32, y UInt32) ENGINE MergeTree() ORDER BY x;
INSERT INTO $R1 VALUES (0, 1)(1, 2)(2, 3)(3, 4);
INSERT INTO $T1 VALUES (0, 1)(1, 2)(2, 3)(3, 4);
"
# Check that in mutations of replicated tables predicates do not contain non-deterministic functions
${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 DELETE WHERE ignore(rand())" 2>&1 \
| fgrep -q "must use only deterministic functions" && echo 'OK' || echo 'FAIL'
${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 UPDATE y = y + rand() % 1 WHERE not ignore()" 2>&1 \
| fgrep -q "must use only deterministic functions" && echo 'OK' || echo 'FAIL'
# For regular tables we do not enforce deterministic functions
${CLICKHOUSE_CLIENT} --query "ALTER TABLE $T1 DELETE WHERE rand() = 0" 2>&1 > /dev/null \
&& echo 'OK' || echo 'FAIL'
${CLICKHOUSE_CLIENT} --query "ALTER TABLE $T1 UPDATE y = y + rand() % 1 WHERE not ignore()" 2>&1 > /dev/null \
&& echo 'OK' || echo 'FAIL'
${CLICKHOUSE_CLIENT} -n -q "
DROP TABLE IF EXISTS $R2;
DROP TABLE IF EXISTS $R1;
DROP TABLE IF EXISTS $T1;
"