Merge pull request #46171 from Avogar/insert-null-as-default

Use default of column type in `insert_null_as_default` if column DEFAULT values is not specified
This commit is contained in:
Kruglov Pavel 2023-02-20 21:45:02 +01:00 committed by GitHub
commit b0424c1021
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 111 additions and 21 deletions

View File

@ -310,6 +310,13 @@ MutableColumnPtr ColumnLowCardinality::cloneResized(size_t size) const
return ColumnLowCardinality::create(IColumn::mutate(std::move(unique_ptr)), getIndexes().cloneResized(size));
}
MutableColumnPtr ColumnLowCardinality::cloneNullable() const
{
auto res = cloneFinalized();
assert_cast<ColumnLowCardinality &>(*res).nestedToNullable();
return res;
}
int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const
{
const auto & low_cardinality_column = assert_cast<const ColumnLowCardinality &>(rhs);
@ -830,4 +837,11 @@ void ColumnLowCardinality::Dictionary::compact(ColumnPtr & positions)
shared = false;
}
bool isColumnLowCardinalityNullable(const IColumn & column)
{
if (const auto * lc_column = checkAndGetColumn<ColumnLowCardinality>(column))
return lc_column->nestedIsNullable();
return false;
}
}

View File

@ -219,6 +219,7 @@ public:
bool nestedCanBeInsideNullable() const { return dictionary.getColumnUnique().getNestedColumn()->canBeInsideNullable(); }
void nestedToNullable() { dictionary.getColumnUnique().nestedToNullable(); }
void nestedRemoveNullable() { dictionary.getColumnUnique().nestedRemoveNullable(); }
MutableColumnPtr cloneNullable() const;
const IColumnUnique & getDictionary() const { return dictionary.getColumnUnique(); }
IColumnUnique & getDictionary() { return dictionary.getColumnUnique(); }
@ -360,5 +361,7 @@ private:
void getPermutationImpl(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator = nullptr) const;
};
bool isColumnLowCardinalityNullable(const IColumn & column);
}

View File

@ -8,6 +8,7 @@
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/ColumnLowCardinality.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#if USE_EMBEDDED_COMPILER
@ -792,6 +793,23 @@ ColumnPtr makeNullable(const ColumnPtr & column)
return ColumnNullable::create(column, ColumnUInt8::create(column->size(), 0));
}
ColumnPtr makeNullableOrLowCardinalityNullable(const ColumnPtr & column)
{
if (isColumnNullable(*column))
return column;
if (isColumnLowCardinalityNullable(*column))
return column;
if (isColumnConst(*column))
return ColumnConst::create(makeNullable(assert_cast<const ColumnConst &>(*column).getDataColumnPtr()), column->size());
if (column->lowCardinality())
return assert_cast<const ColumnLowCardinality &>(*column).cloneNullable();
return ColumnNullable::create(column, ColumnUInt8::create(column->size(), 0));
}
ColumnPtr makeNullableSafe(const ColumnPtr & column)
{
if (isColumnNullable(*column))

View File

@ -220,5 +220,6 @@ private:
ColumnPtr makeNullable(const ColumnPtr & column);
ColumnPtr makeNullableSafe(const ColumnPtr & column);
ColumnPtr makeNullableOrLowCardinalityNullable(const ColumnPtr & column);
}

View File

@ -2,6 +2,7 @@
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/Serializations/SerializationNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Columns/ColumnNullable.h>
#include <Core/Field.h>
#include <Parsers/IAST.h>
@ -99,4 +100,19 @@ DataTypePtr removeNullable(const DataTypePtr & type)
return type;
}
DataTypePtr makeNullableOrLowCardinalityNullable(const DataTypePtr & type)
{
if (isNullableOrLowCardinalityNullable(type))
return type;
if (type->lowCardinality())
{
const auto & dictionary_type = assert_cast<const DataTypeLowCardinality &>(*type).getDictionaryType();
return std::make_shared<DataTypeLowCardinality>(makeNullable(dictionary_type));
}
return std::make_shared<DataTypeNullable>(type);
}
}

View File

@ -53,5 +53,6 @@ private:
DataTypePtr makeNullable(const DataTypePtr & type);
DataTypePtr makeNullableSafe(const DataTypePtr & type);
DataTypePtr removeNullable(const DataTypePtr & type);
DataTypePtr makeNullableOrLowCardinalityNullable(const DataTypePtr & type);
}

View File

@ -392,7 +392,7 @@ struct WhichDataType
constexpr bool isAggregateFunction() const { return idx == TypeIndex::AggregateFunction; }
constexpr bool isSimple() const { return isInt() || isUInt() || isFloat() || isString(); }
constexpr bool isLowCarnality() const { return idx == TypeIndex::LowCardinality; }
constexpr bool isLowCardinality() const { return idx == TypeIndex::LowCardinality; }
};
/// IDataType helpers (alternative for IDataType virtual methods with single point of truth)
@ -548,6 +548,11 @@ inline bool isAggregateFunction(const DataTypePtr & data_type)
return which.isAggregateFunction();
}
inline bool isNullableOrLowCardinalityNullable(const DataTypePtr & data_type)
{
return data_type->isNullable() || data_type->isLowCardinalityNullable();
}
template <typename DataType> constexpr bool IsDataTypeDecimal = false;
template <typename DataType> constexpr bool IsDataTypeNumber = false;
template <typename DataType> constexpr bool IsDataTypeDateOrDateTime = false;

View File

@ -8,7 +8,7 @@ namespace DB
bool hasNullable(const DataTypePtr & type)
{
if (type->isNullable() || type->isLowCardinalityNullable())
if (isNullableOrLowCardinalityNullable(type))
return true;
if (const DataTypeArray * type_array = typeid_cast<const DataTypeArray *>(type.get()))

View File

@ -104,7 +104,7 @@ bool deserializeFieldByEscapingRule(
const FormatSettings & format_settings)
{
bool read = true;
bool parse_as_nullable = format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable();
bool parse_as_nullable = format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type);
switch (escaping_rule)
{
case FormatSettings::EscapingRule::Escaped:

View File

@ -198,7 +198,7 @@ namespace JSONUtils
{
try
{
bool as_nullable = format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable();
bool as_nullable = format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type);
if (yield_strings)
{

View File

@ -1084,7 +1084,7 @@ DataTypePtr makeNullableRecursively(DataTypePtr type)
return key_type && value_type ? std::make_shared<DataTypeMap>(removeNullable(key_type), value_type) : nullptr;
}
if (which.isLowCarnality())
if (which.isLowCardinality())
{
const auto * lc_type = assert_cast<const DataTypeLowCardinality *>(type.get());
auto nested_type = makeNullableRecursively(lc_type->getDictionaryType());

View File

@ -449,8 +449,8 @@ BlockIO InterpreterInsertQuery::execute()
{
/// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with
/// default column values (in AddingDefaultsTransform), so all values will be cast correctly.
if (input_columns[col_idx].type->isNullable() && !query_columns[col_idx].type->isNullable() && output_columns.hasDefault(query_columns[col_idx].name))
query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullable(query_columns[col_idx].column), makeNullable(query_columns[col_idx].type), query_columns[col_idx].name));
if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) && output_columns.has(query_columns[col_idx].name))
query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name));
}
}
}

View File

@ -44,7 +44,7 @@ void addDefaultRequiredExpressionsRecursively(
bool convert_null_to_default = false;
if (is_column_in_query)
convert_null_to_default = null_as_default && block.findByName(required_column_name)->type->isNullable() && !required_column_type->isNullable();
convert_null_to_default = null_as_default && isNullableOrLowCardinalityNullable(block.findByName(required_column_name)->type) && !isNullableOrLowCardinalityNullable(required_column_type);
if ((is_column_in_query && !convert_null_to_default) || added_columns.contains(required_column_name))
return;
@ -99,8 +99,14 @@ void addDefaultRequiredExpressionsRecursively(
/// This column is required, but doesn't have default expression, so lets use "default default"
auto column = columns.get(required_column_name);
auto default_value = column.type->getDefault();
auto default_ast = std::make_shared<ASTLiteral>(default_value);
default_expr_list_accum->children.emplace_back(setAlias(default_ast, required_column_name));
ASTPtr expr = std::make_shared<ASTLiteral>(default_value);
if (is_column_in_query && convert_null_to_default)
{
/// We should CAST default value to required type, otherwise the result of ifNull function can be different type.
auto cast_expr = makeASTFunction("_CAST", std::move(expr), std::make_shared<ASTLiteral>(columns.get(required_column_name).type->getName()));
expr = makeASTFunction("ifNull", std::make_shared<ASTIdentifier>(required_column_name), std::move(cast_expr));
}
default_expr_list_accum->children.emplace_back(setAlias(expr, required_column_name));
added_columns.emplace(required_column_name);
}
}
@ -173,6 +179,16 @@ void performRequiredConversions(Block & block, const NamesAndTypesList & require
}
}
bool needConvertAnyNullToDefault(const Block & header, const NamesAndTypesList & required_columns, const ColumnsDescription & columns)
{
for (const auto & required_column : required_columns)
{
if (columns.has(required_column.name) && isNullableOrLowCardinalityNullable(header.findByName(required_column.name)->type) && !isNullableOrLowCardinalityNullable(required_column.type))
return true;
}
return false;
}
ActionsDAGPtr evaluateMissingDefaults(
const Block & header,
const NamesAndTypesList & required_columns,
@ -181,7 +197,7 @@ ActionsDAGPtr evaluateMissingDefaults(
bool save_unneeded_columns,
bool null_as_default)
{
if (!columns.hasDefaults())
if (!columns.hasDefaults() && (!null_as_default || !needConvertAnyNullToDefault(header, required_columns, columns)))
return nullptr;
ASTPtr expr_list = defaultRequiredExpressions(header, required_columns, columns, null_as_default);

View File

@ -293,7 +293,7 @@ bool CSVFormatReader::readField(
return false;
}
if (format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable())
if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type))
{
/// If value is null but type is not nullable then use default value instead.
return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization);

View File

@ -389,7 +389,7 @@ bool MySQLDumpRowInputFormat::readField(IColumn & column, size_t column_idx)
{
const auto & type = types[column_idx];
const auto & serialization = serializations[column_idx];
if (format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable())
if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type))
return SerializationNullable::deserializeTextQuotedImpl(column, *in, format_settings, serialization);
serialization->deserializeTextQuoted(column, *in, format_settings);

View File

@ -146,7 +146,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex
seen_columns[index] = read_columns[index] = true;
const auto & type = getPort().getHeader().getByPosition(index).type;
const auto & serialization = serializations[index];
if (format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable())
if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type))
read_columns[index] = SerializationNullable::deserializeTextEscapedImpl(*columns[index], *in, format_settings, serialization);
else
serialization->deserializeTextEscaped(*columns[index], *in, format_settings);

View File

@ -156,7 +156,7 @@ bool TabSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & t
return false;
}
bool as_nullable = format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable();
bool as_nullable = format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type);
if (is_raw)
{
@ -242,7 +242,7 @@ bool TabSeparatedFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
void TabSeparatedFormatReader::checkNullValueForNonNullable(DataTypePtr type)
{
bool can_be_parsed_as_null = type->isNullable() || type->isLowCardinalityNullable() || format_settings.null_as_default;
bool can_be_parsed_as_null = isNullableOrLowCardinalityNullable(type) || format_settings.null_as_default;
// check null value for type is not nullable. don't cross buffer bound for simplicity, so maybe missing some case
if (!can_be_parsed_as_null && !buf->eof())

View File

@ -272,7 +272,7 @@ bool ValuesBlockInputFormat::tryReadValue(IColumn & column, size_t column_idx)
{
const auto & type = types[column_idx];
const auto & serialization = serializations[column_idx];
if (format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable())
if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type))
read = SerializationNullable::deserializeTextQuotedImpl(column, *buf, format_settings, serialization);
else
serialization->deserializeTextQuoted(column, *buf, format_settings);

View File

@ -740,7 +740,7 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool /*attach*/)
const auto & array_type = assert_cast<const DataTypeArray &>(*index_data_type);
data_type = WhichDataType(array_type.getNestedType());
}
else if (data_type.isLowCarnality())
else if (data_type.isLowCardinality())
{
const auto & low_cardinality = assert_cast<const DataTypeLowCardinality &>(*index_data_type);
data_type = WhichDataType(low_cardinality.getDictionaryType());

View File

@ -747,7 +747,7 @@ void invertedIndexValidator(const IndexDescription & index, bool /*attach*/)
const auto & gin_type = assert_cast<const DataTypeArray &>(*index_data_type);
data_type = WhichDataType(gin_type.getNestedType());
}
else if (data_type.isLowCarnality())
else if (data_type.isLowCardinality())
{
const auto & low_cardinality = assert_cast<const DataTypeLowCardinality &>(*index_data_type);
data_type = WhichDataType(low_cardinality.getDictionaryType());

View File

@ -10,7 +10,7 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_00690;"
${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_00690 (val Int64) engine = Memory;"
${CLICKHOUSE_CLIENT} --query "INSERT INTO test_00690 SELECT 1;"
${CLICKHOUSE_CLIENT} --query "INSERT INTO test_00690 SELECT NULL AS src;" 2>&1 | grep -oF 'while converting source column src to destination column val';
${CLICKHOUSE_CLIENT} --query "INSERT INTO test_00690 SELECT number % 2 ? 1 : NULL AS src FROM numbers(10);" 2>&1 | grep -oF 'while converting source column src to destination column val';
${CLICKHOUSE_CLIENT} --query "INSERT INTO test_00690 SELECT NULL AS src SETTINGS insert_null_as_default=0;" 2>&1 | grep -oF 'while converting source column src to destination column val';
${CLICKHOUSE_CLIENT} --query "INSERT INTO test_00690 SELECT number % 2 ? 1 : NULL AS src FROM numbers(10) SETTINGS insert_null_as_default=0;" 2>&1 | grep -oF 'while converting source column src to destination column val';
${CLICKHOUSE_CLIENT} --query "DROP TABLE test_00690;"

View File

@ -0,0 +1,4 @@
0
42
Hello
World

View File

@ -0,0 +1,12 @@
drop table if exists test;
create table test (x UInt64) engine=Memory();
set insert_null_as_default=1;
insert into test select number % 2 ? NULL : 42 as x from numbers(2);
select * from test order by x;
drop table test;
create table test (x LowCardinality(String) default 'Hello') engine=Memory();
insert into test select (number % 2 ? NULL : 'World')::LowCardinality(Nullable(String)) from numbers(2);
select * from test order by x;
drop table test;