Don't allow suspicious variant types by default, add notes in documentation

This commit is contained in:
avogar 2024-02-23 20:48:38 +00:00
parent 39ad054e94
commit 8fa75a21c5
13 changed files with 82 additions and 17 deletions

View File

@ -12,6 +12,11 @@ has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` v
The order of nested types doesn't matter: Variant(T1, T2) = Variant(T2, T1).
Nested types can be arbitrary types except Nullable(...), LowCardinality(Nullable(...)) and Variant(...) types.
:::note
It's not recommended to use similar types as variants (for example different numeric types like `Variant(UInt32, Int64)` or different date types like `Variant(Date, DateTime)`),
because working with values of such types can lead to ambiguity. By default, creating such `Variant` type will lead to an exception, but can be enabled using setting `allow_suspicious_variant_types`
:::
:::note
The Variant data type is an experimental feature. To use it, set `allow_experimental_variant_type = 1`.
:::
@ -371,4 +376,22 @@ SELECT v2, v2.`Array(UInt32)`, variantType(v2) FROM test WHERE variantType(v2) =
└────┴──────────────────┴─────────────────┘
```
**Note:** values of variants with different numeric types are considered as different variants and not compared between each other, their type names are compared instead.
Example:
```sql
SET allow_suspicious_variant_types = 1;
CREATE TABLE test (v Variant(UInt32, Int64)) ENGINE=Memory;
INSERT INTO test VALUES (1::UInt32), (1::Int64), (100::UInt32), (100::Int64);
SELECT v, variantType(v) FROM test ORDER by v;
```
```text
┌─v───┬─variantType(v)─┐
│ 1 │ Int64 │
│ 100 │ Int64 │
│ 1 │ UInt32 │
│ 100 │ UInt32 │
└─────┴────────────────┘
```

View File

@ -160,6 +160,7 @@ class IColumn;
M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misuse", 0) \
M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \
M(Bool, allow_suspicious_ttl_expressions, false, "Reject TTL expressions that don't depend on any of table's columns. It indicates a user error most of the time.", 0) \
M(Bool, allow_suspicious_variant_types, false, "In CREATE TABLE statement allows specifying Variant type with similar variant types (for example, with different numeric or date types). Enabling this setting may introduce some ambiguity when working with values with similar types.", 0) \
M(Bool, compile_expressions, false, "Compile some scalar functions and operators to native code.", 0) \
M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \

View File

@ -1,21 +1,23 @@
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeVariant.h>
#include <DataTypes/getLeastSupertype.h>
#include <Interpreters/Context.h>
#include <Interpreters/InterpreterCreateQuery.h>
#include <Interpreters/parseColumnsListForTableFunction.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Interpreters/InterpreterCreateQuery.h>
#include <Interpreters/Context.h>
#include <Interpreters/parseColumnsListForTableFunction.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeNullable.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY;
extern const int ILLEGAL_COLUMN;
extern const int LOGICAL_ERROR;
extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY;
extern const int ILLEGAL_COLUMN;
}
@ -73,6 +75,34 @@ void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidatio
data_type.getName());
}
}
if (!settings.allow_suspicious_variant_types)
{
if (const auto * variant_type = typeid_cast<const DataTypeVariant *>(&data_type))
{
const auto & variants = variant_type->getVariants();
chassert(!variants.empty());
for (size_t i = 0; i < variants.size() - 1; ++i)
{
for (size_t j = i + 1; j < variants.size(); ++j)
{
if (auto supertype = tryGetLeastSupertype(DataTypes{variants[i], variants[j]}))
{
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Cannot create column with type '{}' because variants '{}' and '{}' have similar types and working with values "
"of these types may lead to ambiguity. "
"Consider using common single variant '{}' instead of these 2 variants or set setting allow_suspicious_variant_types = 1 "
"in order to allow it",
data_type.getName(),
variants[i]->getName(),
variants[j]->getName(),
supertype->getName());
}
}
}
}
}
};
validate_callback(*type_to_check);
@ -104,7 +134,8 @@ bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescrip
const char * start = structure.data();
const char * end = structure.data() + structure.size();
ASTPtr columns_list_raw = tryParseQuery(parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth);
ASTPtr columns_list_raw = tryParseQuery(
parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth);
if (!columns_list_raw)
return false;

View File

@ -19,6 +19,7 @@ struct DataTypeValidationSettings
, allow_experimental_object_type(settings.allow_experimental_object_type)
, allow_suspicious_fixed_string_types(settings.allow_suspicious_fixed_string_types)
, allow_experimental_variant_type(settings.allow_experimental_variant_type)
, allow_suspicious_variant_types(settings.allow_suspicious_variant_types)
{
}
@ -26,6 +27,7 @@ struct DataTypeValidationSettings
bool allow_experimental_object_type = true;
bool allow_suspicious_fixed_string_types = true;
bool allow_experimental_variant_type = true;
bool allow_suspicious_variant_types = true;
};
void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings & settings);

View File

@ -1,4 +1,5 @@
set allow_experimental_variant_type = 1;
set allow_suspicious_variant_types = 1;
set session_timezone = 'UTC';
select 'JSON';
@ -263,4 +264,4 @@ select v, variantElement(v, 'Array(LowCardinality(Nullable(String)))') from form
select 'Nullable';
select v, variantElement(v, 'Array(Nullable(String))') from format(Values, 'v Variant(String, Array(Nullable(String)))', '(NULL), (''string''), ([''hello'', null, ''world''])') format Values;
select '';
select '';

View File

@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1"
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1"
function test1_insert()
{

View File

@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1"
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1"
function test4_insert()
{

View File

@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1"
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1"
function test5_insert()
{

View File

@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1"
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1"
function test6_insert()
{

View File

@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 "
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1"
function test()

View File

@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 "
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1"
function test1_insert()

View File

@ -0,0 +1,7 @@
set allow_suspicious_variant_types=0;
select 42::Variant(UInt32, Int64); -- {serverError ILLEGAL_COLUMN}
select [42]::Variant(Array(UInt32), Array(Int64)); -- {serverError ILLEGAL_COLUMN}
select 'Hello'::Variant(String, LowCardinality(String)); -- {serverError ILLEGAL_COLUMN}
select (1, 'Hello')::Variant(Tuple(UInt32, String), Tuple(Int64, String)); -- {serverError ILLEGAL_COLUMN}
select map(42, 42)::Variant(Map(UInt64, UInt32), Map(UInt64, Int64)); -- {serverError ILLEGAL_COLUMN}