diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 79e809ea7f1..3102ab8297c 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -109,7 +109,7 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL ::/0 - ${CLICKHOUSE_PASSWORD} + /]]]]>}]]> default ${CLICKHOUSE_ACCESS_MANAGEMENT} diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index df96b8129f1..4cf8b8bd1c5 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1396,6 +1396,7 @@ SELECT * FROM json_each_row_nested - [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - ignore unknown keys in json object for named tuples. Default value - `false`. - [input_format_json_compact_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_json_compact_allow_variable_number_of_columns) - allow variable number of columns in JSONCompact/JSONCompactEachRow format, ignore extra columns and use default values on missing columns. Default value - `false`. - [input_format_json_throw_on_bad_escape_sequence](/docs/en/operations/settings/settings-formats.md/#input_format_json_throw_on_bad_escape_sequence) - throw an exception if JSON string contains bad escape sequence. If disabled, bad escape sequences will remain as is in the data. Default value - `true`. +- [input_format_json_empty_as_default](/docs/en/operations/settings/settings-formats.md/#input_format_json_empty_as_default) - treat empty fields in JSON input as default values. Default value - `false`. For complex default expressions [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) must be enabled too. - [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. - [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. - [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 5aad8db2809..c012d065574 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -752,6 +752,17 @@ Possible values: Default value: 0. +### input_format_json_empty_as_default {#input_format_json_empty_as_default} + +When enabled, replace empty input fields in JSON with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. + +Possible values: + ++ 0 — Disable. ++ 1 — Enable. + +Default value: 0. + ## TSV format settings {#tsv-format-settings} ### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default} diff --git a/src/Common/atomicRename.cpp b/src/Common/atomicRename.cpp index 4acdff5f66c..7d181d72154 100644 --- a/src/Common/atomicRename.cpp +++ b/src/Common/atomicRename.cpp @@ -57,11 +57,13 @@ namespace ErrorCodes namespace DB { -static bool supportsAtomicRenameImpl() +static std::optional supportsAtomicRenameImpl() { VersionNumber renameat2_minimal_version(3, 15, 0); VersionNumber linux_version(Poco::Environment::osVersion()); - return linux_version >= renameat2_minimal_version; + if (linux_version >= renameat2_minimal_version) + return std::nullopt; + return fmt::format("Linux kernel 3.15+ is required, got {}", linux_version.toString()); } static bool renameat2(const std::string & old_path, const std::string & new_path, int flags) @@ -97,10 +99,14 @@ static bool renameat2(const std::string & old_path, const std::string & new_path ErrnoException::throwFromPath(ErrorCodes::SYSTEM_ERROR, new_path, "Cannot rename {} to {}", old_path, new_path); } -bool supportsAtomicRename() +bool supportsAtomicRename(std::string * out_message) { - static bool supports = supportsAtomicRenameImpl(); - return supports; + static auto error = supportsAtomicRenameImpl(); + if (!error.has_value()) + return true; + if (out_message) + *out_message = error.value(); + return false; } } @@ -152,16 +158,22 @@ static bool renameat2(const std::string & old_path, const std::string & new_path } -static bool supportsAtomicRenameImpl() +static std::optional supportsAtomicRenameImpl() { auto fun = dlsym(RTLD_DEFAULT, "renamex_np"); - return fun != nullptr; + if (fun != nullptr) + return std::nullopt; + return "macOS 10.12 or later is required"; } -bool supportsAtomicRename() +bool supportsAtomicRename(std::string * out_message) { - static bool supports = supportsAtomicRenameImpl(); - return supports; + static auto error = supportsAtomicRenameImpl(); + if (!error.has_value()) + return true; + if (out_message) + *out_message = error.value(); + return false; } } @@ -179,8 +191,10 @@ static bool renameat2(const std::string &, const std::string &, int) return false; } -bool supportsAtomicRename() +bool supportsAtomicRename(std::string * out_message) { + if (out_message) + *out_message = "only Linux and macOS are supported"; return false; } diff --git a/src/Common/atomicRename.h b/src/Common/atomicRename.h index 6da8a8f623b..96d0d6d1e5a 100644 --- a/src/Common/atomicRename.h +++ b/src/Common/atomicRename.h @@ -6,7 +6,7 @@ namespace DB { /// Returns true, if the following functions supported by the system -bool supportsAtomicRename(); +bool supportsAtomicRename(std::string * out_message = nullptr); /// Atomically rename old_path to new_path. If new_path exists, do not overwrite it and throw exception void renameNoReplace(const std::string & old_path, const std::string & new_path); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 3a2c7b12b13..1cd977f6725 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1144,6 +1144,7 @@ class IColumn; M(Bool, input_format_try_infer_variants, false, "Try to infer the Variant type in text formats when there is more than one possible type for column/array elements", 0) \ M(Bool, type_json_skip_duplicated_paths, false, "When enabled, during parsing JSON object into JSON type duplicated paths will be ignored and only the first one will be inserted instead of an exception", 0) \ M(UInt64, input_format_json_max_depth, 1000, "Maximum depth of a field in JSON. This is not a strict limit, it does not have to be applied precisely.", 0) \ + M(Bool, input_format_json_empty_as_default, false, "Treat empty fields in JSON input as default values.", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 6bd354ce05b..eb47c221c3d 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -71,6 +71,7 @@ static std::initializer_list #include +#include namespace DB { @@ -615,28 +616,49 @@ void SerializationArray::serializeTextJSONPretty(const IColumn & column, size_t } -void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +template +ReturnType SerializationArray::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeTextImpl(column, istr, - [&](IColumn & nested_column) + auto deserialize_nested = [&settings, this](IColumn & nested_column, ReadBuffer & buf) -> ReturnType + { + if constexpr (std::is_same_v) { if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested); + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested); else - nested->deserializeTextJSON(nested_column, istr, settings); - }, false); + nested->deserializeTextJSON(nested_column, buf, settings); + } + else + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested); + return nested->tryDeserializeTextJSON(nested_column, buf, settings); + } + }; + + if (settings.json.empty_as_default) + return deserializeTextImpl(column, istr, + [&deserialize_nested, &istr](IColumn & nested_column) -> ReturnType + { + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(nested_column, istr, deserialize_nested); + }, false); + else + return deserializeTextImpl(column, istr, + [&deserialize_nested, &istr](IColumn & nested_column) -> ReturnType + { + return deserialize_nested(nested_column, istr); + }, false); +} + + +void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextJSONImpl(column, istr, settings); } bool SerializationArray::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - auto read_nested = [&](IColumn & nested_column) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested); - return nested->tryDeserializeTextJSON(nested_column, istr, settings); - }; - - return deserializeTextImpl(column, istr, std::move(read_nested), false); + return deserializeTextJSONImpl(column, istr, settings); } diff --git a/src/DataTypes/Serializations/SerializationArray.h b/src/DataTypes/Serializations/SerializationArray.h index c3353f0c251..7e34abfac90 100644 --- a/src/DataTypes/Serializations/SerializationArray.h +++ b/src/DataTypes/Serializations/SerializationArray.h @@ -82,6 +82,10 @@ public: SerializationPtr create(const SerializationPtr & prev) const override; ColumnPtr create(const ColumnPtr & prev) const override; }; + +private: + template + ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; }; } diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index c722b3ac7a1..ae864cbf7b4 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -316,28 +317,52 @@ void SerializationMap::serializeTextJSONPretty(const IColumn & column, size_t ro } -void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +template +ReturnType SerializationMap::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeTextImpl(column, istr, - [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) + auto deserialize_nested = [&settings](IColumn & subcolumn, ReadBuffer & buf, const SerializationPtr & subcolumn_serialization) -> ReturnType + { + if constexpr (std::is_same_v) { if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); else subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings); - }); + } + else + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); + return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings); + } + }; + + if (settings.json.empty_as_default) + return deserializeTextImpl(column, istr, + [&deserialize_nested](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType + { + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(subcolumn, buf, + [&deserialize_nested, &subcolumn_serialization](IColumn & subcolumn_, ReadBuffer & buf_) -> ReturnType + { + return deserialize_nested(subcolumn_, buf_, subcolumn_serialization); + }); + }); + else + return deserializeTextImpl(column, istr, + [&deserialize_nested](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType + { + return deserialize_nested(subcolumn, buf, subcolumn_serialization); + }); +} + +void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextJSONImpl(column, istr, settings); } bool SerializationMap::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - auto reader = [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); - return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings); - }; - - return deserializeTextImpl(column, istr, reader); + return deserializeTextJSONImpl(column, istr, settings); } void SerializationMap::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index cfcde445c1f..007d153ec7e 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -74,6 +74,9 @@ private: template ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const; + + template + ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; }; } diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 594a23ab507..e1fcb1a8d48 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -313,27 +314,9 @@ void SerializationTuple::serializeTextJSONPretty(const IColumn & column, size_t } template -ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +ReturnType SerializationTuple::deserializeTupleJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const { - static constexpr bool throw_exception = std::is_same_v; - - auto deserialize_element = [&](IColumn & element_column, size_t element_pos) - { - if constexpr (throw_exception) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(element_column, istr, settings, elems[element_pos]); - else - elems[element_pos]->deserializeTextJSON(element_column, istr, settings); - return true; - } - else - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(element_column, istr, settings, elems[element_pos]); - return elems[element_pos]->tryDeserializeTextJSON(element_column, istr, settings); - } - }; + static constexpr auto throw_exception = std::is_same_v; if (settings.json.read_named_tuples_as_objects && have_explicit_names) @@ -506,12 +489,51 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf } } -void SerializationTuple::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +template +ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeTextJSONImpl(column, istr, settings); + auto deserialize_nested = [&settings](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType + { + if constexpr (std::is_same_v) + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization); + else + nested_column_serialization->deserializeTextJSON(nested_column, buf, settings); + } + else + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization); + else + return nested_column_serialization->tryDeserializeTextJSON(nested_column, buf, settings); + } + }; + + if (settings.json.empty_as_default) + return deserializeTupleJSONImpl(column, istr, settings, + [&deserialize_nested, &istr, this](IColumn & nested_column, size_t element_pos) -> ReturnType + { + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(nested_column, istr, + [&deserialize_nested, element_pos, this](IColumn & nested_column_, ReadBuffer & buf) -> ReturnType + { + return deserialize_nested(nested_column_, buf, elems[element_pos]); + }); + }); + else + return deserializeTupleJSONImpl(column, istr, settings, + [&deserialize_nested, &istr, this](IColumn & nested_column, size_t element_pos) -> ReturnType + { + return deserialize_nested(nested_column, istr, elems[element_pos]); + }); } -bool SerializationTuple::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextJSONImpl(column, istr, settings); +} + +bool SerializationTuple::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { return deserializeTextJSONImpl(column, istr, settings); } diff --git a/src/DataTypes/Serializations/SerializationTuple.h b/src/DataTypes/Serializations/SerializationTuple.h index 810673d8b21..c51adb6e536 100644 --- a/src/DataTypes/Serializations/SerializationTuple.h +++ b/src/DataTypes/Serializations/SerializationTuple.h @@ -81,7 +81,10 @@ private: template ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const; - template + template + ReturnType deserializeTupleJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const; + + template ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; template diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index d86e29ca915..e2e2414b1ca 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -197,8 +197,9 @@ void DatabaseAtomic::renameTable(ContextPtr local_context, const String & table_ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Moving tables between databases of different engines is not supported"); } - if (exchange && !supportsAtomicRename()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "RENAME EXCHANGE is not supported"); + std::string message; + if (exchange && !supportsAtomicRename(&message)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "RENAME EXCHANGE is not supported because exchanging files is not supported by the OS ({})", message); waitDatabaseStarted(); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index f9eb586d647..5a7ed523192 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -152,6 +152,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects; format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence; format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields; + format_settings.json.empty_as_default = settings.input_format_json_empty_as_default; format_settings.json.type_json_skip_duplicated_paths = settings.type_json_skip_duplicated_paths; format_settings.null_as_default = settings.input_format_null_as_default; format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 8b489812662..00b32ae172f 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -237,6 +237,7 @@ struct FormatSettings bool infer_incomplete_types_as_strings = true; bool throw_on_bad_escape_sequence = true; bool ignore_unnecessary_fields = true; + bool empty_as_default = false; bool type_json_skip_duplicated_paths = false; } json{}; diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 123f2e4f608..e4d43140ca0 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -2,12 +2,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include @@ -286,11 +288,19 @@ namespace JSONUtils return true; } - if (as_nullable) - return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column, in, format_settings, serialization); + auto deserialize = [as_nullable, &format_settings, &serialization](IColumn & column_, ReadBuffer & buf) -> bool + { + if (as_nullable) + return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column_, buf, format_settings, serialization); - serialization->deserializeTextJSON(column, in, format_settings); - return true; + serialization->deserializeTextJSON(column_, buf, format_settings); + return true; + }; + + if (format_settings.json.empty_as_default) + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, in, deserialize); + else + return deserialize(column, in); } catch (Exception & e) { @@ -920,6 +930,78 @@ namespace JSONUtils } } + template + ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested) + { + static constexpr auto throw_exception = std::is_same_v; + + static constexpr auto EMPTY_STRING = "\"\""; + static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); + + if (istr.eof() || *istr.position() != EMPTY_STRING[0]) + return deserialize_nested(column, istr); + + auto do_deserialize = [](IColumn & column_, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize) -> ReturnType + { + if (check_for_empty_string(buf)) + { + column_.insertDefault(); + return ReturnType(default_column_return_value); + } + return deserialize(column_, buf); + }; + + if (istr.available() >= EMPTY_STRING_LENGTH) + { + /// We have enough data in buffer to check if we have an empty string. + auto check_for_empty_string = [](ReadBuffer & buf) -> bool + { + auto * pos = buf.position(); + if (checkString(EMPTY_STRING, buf)) + return true; + buf.position() = pos; + return false; + }; + + return do_deserialize(column, istr, check_for_empty_string, deserialize_nested); + } + + /// We don't have enough data in buffer to check if we have an empty string. + /// Use PeekableReadBuffer to make a checkpoint before checking for an + /// empty string and rollback if check was failed. + + auto check_for_empty_string = [](ReadBuffer & buf) -> bool + { + auto & peekable_buf = assert_cast(buf); + peekable_buf.setCheckpoint(); + SCOPE_EXIT(peekable_buf.dropCheckpoint()); + if (checkString(EMPTY_STRING, peekable_buf)) + return true; + peekable_buf.rollbackToCheckpoint(); + return false; + }; + + auto deserialize_nested_with_check = [&deserialize_nested](IColumn & column_, ReadBuffer & buf) -> ReturnType + { + auto & peekable_buf = assert_cast(buf); + if constexpr (throw_exception) + deserialize_nested(column_, peekable_buf); + else if (!deserialize_nested(column_, peekable_buf)) + return ReturnType(false); + + if (unlikely(peekable_buf.hasUnreadData())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state while parsing JSON: PeekableReadBuffer has unread data in own memory: {}", String(peekable_buf.position(), peekable_buf.available())); + + return ReturnType(true); + }; + + PeekableReadBuffer peekable_buf(istr, true); + return do_deserialize(column, peekable_buf, check_for_empty_string, deserialize_nested_with_check); + } + + template void deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); } } diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h index 622703947b9..492da52eb7e 100644 --- a/src/Formats/JSONUtils.h +++ b/src/Formats/JSONUtils.h @@ -8,6 +8,7 @@ #include #include #include +#include #include namespace DB @@ -146,6 +147,16 @@ namespace JSONUtils bool skipUntilFieldInObject(ReadBuffer & in, const String & desired_field_name, const FormatSettings::JSON & settings); void skipTheRestOfObject(ReadBuffer & in, const FormatSettings::JSON & settings); + + template + using NestedDeserialize = std::function; + + template + ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + + extern template void deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + extern template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + extern template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); } } diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index ac940c62a1a..d906540d6df 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -85,7 +85,9 @@ ConcurrentHashJoin::ConcurrentHashJoin( CurrentMetrics::ConcurrentHashJoinPoolThreads, CurrentMetrics::ConcurrentHashJoinPoolThreadsActive, CurrentMetrics::ConcurrentHashJoinPoolThreadsScheduled, - slots)) + /*max_threads_*/ slots, + /*max_free_threads_*/ 0, + /*queue_size_*/ slots)) , stats_collecting_params(stats_collecting_params_) { hash_joins.resize(slots); diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index 1b8b45b94ea..230e4cd9691 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -1236,6 +1236,7 @@ IBlocksStreamPtr HashJoin::getNonJoinedBlocks(const Block & left_sample_block, void HashJoin::reuseJoinedData(const HashJoin & join) { + have_compressed = join.have_compressed; data = join.data; from_storage_join = true; diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 40153ea77a2..d949f0394bc 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -968,6 +968,11 @@ void InterpreterCreateQuery::validateMaterializedViewColumnsAndEngine(const ASTC if (database && database->getEngineName() != "Atomic") throw Exception(ErrorCodes::INCORRECT_QUERY, "Refreshable materialized views (except with APPEND) only support Atomic database engine, but database {} has engine {}", create.getDatabase(), database->getEngineName()); + + std::string message; + if (!supportsAtomicRename(&message)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Can't create refreshable materialized view because exchanging files is not supported by the OS ({})", message); } Block input_block; diff --git a/src/Parsers/ParserShowColumnsQuery.cpp b/src/Parsers/ParserShowColumnsQuery.cpp index 5d26d7bf1d4..9c31786ad57 100644 --- a/src/Parsers/ParserShowColumnsQuery.cpp +++ b/src/Parsers/ParserShowColumnsQuery.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include #include @@ -18,7 +18,6 @@ bool ParserShowColumnsQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ASTPtr from1; ASTPtr from2; - String from1_str; String from2_str; auto query = std::make_shared(); @@ -43,25 +42,18 @@ bool ParserShowColumnsQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe else return false; - tryGetIdentifierNameInto(from1, from1_str); - - bool abbreviated_form = from1_str.contains("."); // FROM database.table - if (abbreviated_form) - { - std::vector split; - boost::split(split, from1_str, boost::is_any_of(".")); - query->database = split[0]; - query->table = split[1]; - } + const auto * table_id = from1->as(); + if (!table_id) + return false; + query->table = table_id->shortName(); + if (table_id->compound()) + query->database = table_id->name_parts[0]; else { if (ParserKeyword(Keyword::FROM).ignore(pos, expected) || ParserKeyword(Keyword::IN).ignore(pos, expected)) if (!ParserIdentifier().parse(pos, from2, expected)) return false; - tryGetIdentifierNameInto(from2, from2_str); - - query->table = from1_str; query->database = from2_str; } diff --git a/src/Parsers/ParserShowIndexesQuery.cpp b/src/Parsers/ParserShowIndexesQuery.cpp index 495dfc5101f..6469d74b016 100644 --- a/src/Parsers/ParserShowIndexesQuery.cpp +++ b/src/Parsers/ParserShowIndexesQuery.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include #include @@ -17,7 +17,6 @@ bool ParserShowIndexesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ASTPtr from1; ASTPtr from2; - String from1_str; String from2_str; auto query = std::make_shared(); @@ -39,25 +38,18 @@ bool ParserShowIndexesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe else return false; - tryGetIdentifierNameInto(from1, from1_str); - - bool abbreviated_form = from1_str.contains("."); // FROM database.table - if (abbreviated_form) - { - std::vector split; - boost::split(split, from1_str, boost::is_any_of(".")); - query->database = split[0]; - query->table = split[1]; - } + const auto * table_id = from1->as(); + if (!table_id) + return false; + query->table = table_id->shortName(); + if (table_id->compound()) + query->database = table_id->name_parts[0]; else { if (ParserKeyword(Keyword::FROM).ignore(pos, expected) || ParserKeyword(Keyword::IN).ignore(pos, expected)) if (!ParserIdentifier().parse(pos, from2, expected)) return false; - tryGetIdentifierNameInto(from2, from2_str); - - query->table = from1_str; query->database = from2_str; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index 6e22a3515bc..67570d78366 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -15,6 +15,7 @@ namespace ErrorCodes { extern const int SUPPORT_IS_DISABLED; extern const int REPLICA_STATUS_CHANGED; + extern const int LOGICAL_ERROR; } ReplicatedMergeTreeAttachThread::ReplicatedMergeTreeAttachThread(StorageReplicatedMergeTree & storage_) @@ -117,6 +118,67 @@ void ReplicatedMergeTreeAttachThread::checkHasReplicaMetadataInZooKeeper(const z } } +Int32 ReplicatedMergeTreeAttachThread::fixReplicaMetadataVersionIfNeeded(zkutil::ZooKeeperPtr zookeeper) +{ + const String & zookeeper_path = storage.zookeeper_path; + const String & replica_path = storage.replica_path; + const bool replica_readonly = storage.is_readonly; + + for (size_t i = 0; i != 2; ++i) + { + String replica_metadata_version_str; + const bool replica_metadata_version_exists = zookeeper->tryGet(replica_path + "/metadata_version", replica_metadata_version_str); + if (!replica_metadata_version_exists) + return -1; + + const Int32 metadata_version = parse(replica_metadata_version_str); + + if (metadata_version != 0 || replica_readonly) + { + /// No need to fix anything + return metadata_version; + } + + Coordination::Stat stat; + zookeeper->get(fs::path(zookeeper_path) / "metadata", &stat); + if (stat.version == 0) + { + /// No need to fix anything + return metadata_version; + } + + ReplicatedMergeTreeQueue & queue = storage.queue; + queue.pullLogsToQueue(zookeeper); + if (queue.getStatus().metadata_alters_in_queue != 0) + { + LOG_DEBUG(log, "No need to update metadata_version as there are ALTER_METADATA entries in the queue"); + return metadata_version; + } + + const Coordination::Requests ops = { + zkutil::makeSetRequest(fs::path(replica_path) / "metadata_version", std::to_string(stat.version), 0), + zkutil::makeCheckRequest(fs::path(zookeeper_path) / "metadata", stat.version), + }; + Coordination::Responses ops_responses; + const auto code = zookeeper->tryMulti(ops, ops_responses); + if (code == Coordination::Error::ZOK) + { + LOG_DEBUG(log, "Successfully set metadata_version to {}", stat.version); + return stat.version; + } + if (code != Coordination::Error::ZBADVERSION) + { + throw zkutil::KeeperException(code); + } + } + + /// Second attempt is only possible if metadata_version != 0 or metadata.version changed during the first attempt. + /// If metadata_version != 0, on second attempt we will return the new metadata_version. + /// If metadata.version changed, on second attempt we will either get metadata_version != 0 and return the new metadata_version or we will get metadata_alters_in_queue != 0 and return 0. + /// Either way, on second attempt this method should return. + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to fix replica metadata_version in ZooKeeper after two attempts"); +} + void ReplicatedMergeTreeAttachThread::runImpl() { storage.setZooKeeper(); @@ -160,11 +222,11 @@ void ReplicatedMergeTreeAttachThread::runImpl() /// Just in case it was not removed earlier due to connection loss zookeeper->tryRemove(replica_path + "/flags/force_restore_data"); - String replica_metadata_version; - const bool replica_metadata_version_exists = zookeeper->tryGet(replica_path + "/metadata_version", replica_metadata_version); + const Int32 replica_metadata_version = fixReplicaMetadataVersionIfNeeded(zookeeper); + const bool replica_metadata_version_exists = replica_metadata_version != -1; if (replica_metadata_version_exists) { - storage.setInMemoryMetadata(metadata_snapshot->withMetadataVersion(parse(replica_metadata_version))); + storage.setInMemoryMetadata(metadata_snapshot->withMetadataVersion(replica_metadata_version)); } else { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h index 250a5ed34d1..bfc97442598 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h @@ -48,6 +48,8 @@ private: void runImpl(); void finalizeInitialization(); + + Int32 fixReplicaMetadataVersionIfNeeded(zkutil::ZooKeeperPtr zookeeper); }; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 7d8018e7577..0fa2be6a389 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -2222,6 +2222,7 @@ ReplicatedMergeTreeQueue::Status ReplicatedMergeTreeQueue::getStatus() const res.inserts_in_queue = 0; res.merges_in_queue = 0; res.part_mutations_in_queue = 0; + res.metadata_alters_in_queue = 0; res.queue_oldest_time = 0; res.inserts_oldest_time = 0; res.merges_oldest_time = 0; @@ -2264,6 +2265,11 @@ ReplicatedMergeTreeQueue::Status ReplicatedMergeTreeQueue::getStatus() const res.oldest_part_to_mutate_to = entry->new_part_name; } } + + if (entry->type == LogEntry::ALTER_METADATA) + { + ++res.metadata_alters_in_queue; + } } return res; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 91a23b6a3b6..9d3349663e2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -473,6 +473,7 @@ public: UInt32 inserts_in_queue; UInt32 merges_in_queue; UInt32 part_mutations_in_queue; + UInt32 metadata_alters_in_queue; UInt32 queue_oldest_time; UInt32 inserts_oldest_time; UInt32 merges_oldest_time; diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 75714cda4d9..8a80d5eebcd 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -215,7 +215,7 @@ static TableZnodeInfo extractZooKeeperPathAndReplicaNameFromEngineArgs( bool is_replicated_database = local_context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY && DatabaseCatalog::instance().getDatabase(table_id.database_name)->getEngineName() == "Replicated"; - if (is_replicated_database && local_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments == 0) + if (!query.attach && is_replicated_database && local_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments == 0) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "It's not allowed to specify explicit zookeeper_path and replica_name " @@ -223,7 +223,7 @@ static TableZnodeInfo extractZooKeeperPathAndReplicaNameFromEngineArgs( "specify them explicitly, enable setting " "database_replicated_allow_replicated_engine_arguments."); } - else if (is_replicated_database && local_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments == 1) + else if (!query.attach && is_replicated_database && local_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments == 1) { LOG_WARNING(&Poco::Logger::get("registerStorageMergeTree"), "It's not recommended to explicitly specify " "zookeeper_path and replica_name in ReplicatedMergeTree arguments"); @@ -238,7 +238,7 @@ static TableZnodeInfo extractZooKeeperPathAndReplicaNameFromEngineArgs( if (!ast_replica_name || ast_replica_name->value.getType() != Field::Types::String) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name must be a string literal{}", verbose_help_message); - if (is_replicated_database && local_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments == 2) + if (!query.attach && is_replicated_database && local_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments == 2) { LOG_WARNING(&Poco::Logger::get("registerStorageMergeTree"), "Replacing user-provided ZooKeeper path and replica name ({}, {}) " "with default arguments", ast_zk_path->value.safeGet(), ast_replica_name->value.safeGet()); diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 60a6e099b22..533eb601ad6 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -1549,3 +1549,19 @@ def test_all_groups_cluster(started_cluster): assert "bad_settings_node\ndummy_node\n" == bad_settings_node.query( "select host_name from system.clusters where name='all_groups.db_cluster' order by host_name" ) + + +def test_detach_attach_table(started_cluster): + main_node.query("DROP DATABASE IF EXISTS detach_attach_db SYNC") + main_node.query( + "CREATE DATABASE detach_attach_db ENGINE = Replicated('/clickhouse/databases/detach_attach_db');" + ) + main_node.query( + "CREATE TABLE detach_attach_db.detach_attach_table (k UInt64) ENGINE=ReplicatedMergeTree ORDER BY k;" + ) + main_node.query("INSERT INTO detach_attach_db.detach_attach_table VALUES (1);") + main_node.query("DETACH TABLE detach_attach_db.detach_attach_table PERMANENTLY;") + main_node.query("ATTACH TABLE detach_attach_db.detach_attach_table;") + assert ( + main_node.query("SELECT * FROM detach_attach_db.detach_attach_table;") == "1\n" + ) diff --git a/tests/queries/0_stateless/02706_show_columns.reference b/tests/queries/0_stateless/02706_show_columns.reference index 29e5329d63d..32cae685a59 100644 --- a/tests/queries/0_stateless/02706_show_columns.reference +++ b/tests/queries/0_stateless/02706_show_columns.reference @@ -45,3 +45,7 @@ uint64 UInt64 NO PRI SOR \N int32 Int32 NO \N str String NO \N uint64 UInt64 NO PRI SOR \N +--- SHOW COLUMNS FROM table with dots +int32 Nullable(Int32) YES \N +str String NO SOR \N +uint64 UInt64 NO PRI SOR \N diff --git a/tests/queries/0_stateless/02706_show_columns.sql b/tests/queries/0_stateless/02706_show_columns.sql index b1a907c5c71..a5164ce8022 100644 --- a/tests/queries/0_stateless/02706_show_columns.sql +++ b/tests/queries/0_stateless/02706_show_columns.sql @@ -90,3 +90,18 @@ SHOW COLUMNS FROM database_123456789abcde.tab; DROP DATABASE database_123456789abcde; DROP TABLE tab; + +DROP TABLE IF EXISTS `tab.with.dots`; +CREATE TABLE `tab.with.dots` +( + `uint64` UInt64, + `int32` Nullable(Int32) COMMENT 'example comment', + `str` String, + INDEX idx str TYPE set(1000) +) +ENGINE = MergeTree +PRIMARY KEY (uint64) +ORDER BY (uint64, str); +SELECT '--- SHOW COLUMNS FROM table with dots'; +SHOW COLUMNS FROM `tab.with.dots`; +DROP TABLE `tab.with.dots`; diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference index ac0461fc506..27802b0991c 100644 --- a/tests/queries/0_stateless/02724_show_indexes.reference +++ b/tests/queries/0_stateless/02724_show_indexes.reference @@ -49,3 +49,10 @@ tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES --- Short form tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +--- SHOW INDEX FROM table with dots +tab.with.dots 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tab.with.dots 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tab.with.dots 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tab.with.dots 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +tab.with.dots 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES +tab.with.dots 1 set_idx 1 \N 0 \N \N \N SET YES e diff --git a/tests/queries/0_stateless/02724_show_indexes.sql b/tests/queries/0_stateless/02724_show_indexes.sql index 04a481fea4e..f7eb978b375 100644 --- a/tests/queries/0_stateless/02724_show_indexes.sql +++ b/tests/queries/0_stateless/02724_show_indexes.sql @@ -78,3 +78,22 @@ SHOW INDEX FROM database_123456789abcde.tbl; DROP DATABASE database_123456789abcde; DROP TABLE tbl; + +DROP TABLE IF EXISTS `tab.with.dots`; +CREATE TABLE `tab.with.dots` +( + a UInt64, + b UInt64, + c UInt64, + d UInt64, + e UInt64, + INDEX mm1_idx (a, c, d) TYPE minmax, + INDEX mm2_idx (c, d, e) TYPE minmax, + INDEX set_idx (e) TYPE set(100), + INDEX blf_idx (d, b) TYPE bloom_filter(0.8) +) +ENGINE = MergeTree +PRIMARY KEY (c, a); +SELECT '--- SHOW INDEX FROM table with dots'; +SHOW INDEX FROM `tab.with.dots`; +DROP TABLE `tab.with.dots`; diff --git a/tests/queries/0_stateless/03222_json_empty_as_default.reference b/tests/queries/0_stateless/03222_json_empty_as_default.reference new file mode 100644 index 00000000000..1a98f45577a --- /dev/null +++ b/tests/queries/0_stateless/03222_json_empty_as_default.reference @@ -0,0 +1,47 @@ +-- Simple types +-- { echoOn } +SELECT x FROM format(JSONEachRow, 'x Date', '{"x":""}'); +1970-01-01 +SELECT x FROM format(JSONEachRow, 'x Date32', '{"x":""}'); +1970-01-01 +SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime', '{"x":""}'); +1970-01-01 00:00:00 +SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime64', '{"x":""}'); +1970-01-01 00:00:00.000 +SELECT x FROM format(JSONEachRow, 'x IPv4', '{"x":""}'); +0.0.0.0 +SELECT x FROM format(JSONEachRow, 'x IPv6', '{"x":""}'); +:: +SELECT x FROM format(JSONEachRow, 'x UUID', '{"x":""}'); +00000000-0000-0000-0000-000000000000 +-- { echoOn } +SELECT COUNT(DISTINCT col) FROM table1; +1 +-- { echoOn } +SELECT * FROM table1 ORDER BY address ASC; +:: +2001:db8:3333:4444:5555:6666:7777:8888 +-- Nullable +-- { echoOn } +SELECT x FROM format(JSONEachRow, 'x Nullable(IPv6)', '{"x":""}'); +\N +-- Compound types +SELECT x FROM format(JSONEachRow, 'x Array(UUID)', '{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e",""]}'); +['00000000-0000-0000-0000-000000000000','b15f852c-c41a-4fd6-9247-1929c841715e','00000000-0000-0000-0000-000000000000'] +SELECT x FROM format(JSONEachRow, 'x Array(Nullable(IPv6))', '{"x":["",""]}'); +[NULL,NULL] +SELECT x FROM format(JSONEachRow, 'x Tuple(Date, IPv4, String)', '{"x":["", "", "abc"]}'); +('1970-01-01','0.0.0.0','abc') +SELECT x FROM format(JSONEachRow, 'x Map(String, IPv6)', '{"x":{"abc": ""}}'); +{'abc':'::'} +SELECT x FROM format(JSONEachRow, 'x Variant(Date, UUID)', '{"x":""}'); +\N +-- Deep composition +SELECT x FROM format(JSONEachRow, 'x Array(Array(IPv6))', '{"x":[["2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF", ""], ["", "2001:db8:3333:4444:5555:6666:7777:8888"]]}'); +[['2001:db8:3333:4444:cccc:dddd:eeee:ffff','::'],['::','2001:db8:3333:4444:5555:6666:7777:8888']] +SELECT x FROM format(JSONEachRow, 'x Variant(Date, Array(UUID))', '{"x":["", "b15f852c-c41a-4fd6-9247-1929c841715e"]}'); +['00000000-0000-0000-0000-000000000000','b15f852c-c41a-4fd6-9247-1929c841715e'] +SELECT x FROM format(JSONEachRow, 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))', '{"x":[[""], ["",{"abc":""}]]}'); +(['00000000-0000-0000-0000-000000000000'],('00000000-0000-0000-0000-000000000000',{'abc':'::'})) +SELECT x FROM format(JSONEachRow, 'x Map(Tuple(Date,IPv4), Variant(UUID,IPv6))', '{"x":{["",""]:""}}'); +{('1970-01-01','0.0.0.0'):NULL} diff --git a/tests/queries/0_stateless/03222_json_empty_as_default.sql b/tests/queries/0_stateless/03222_json_empty_as_default.sql new file mode 100644 index 00000000000..1243d450c2e --- /dev/null +++ b/tests/queries/0_stateless/03222_json_empty_as_default.sql @@ -0,0 +1,60 @@ +SET input_format_json_empty_as_default = 1, allow_experimental_variant_type = 1; + +-- Simple types +-- { echoOn } +SELECT x FROM format(JSONEachRow, 'x Date', '{"x":""}'); +SELECT x FROM format(JSONEachRow, 'x Date32', '{"x":""}'); +SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime', '{"x":""}'); +SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime64', '{"x":""}'); +SELECT x FROM format(JSONEachRow, 'x IPv4', '{"x":""}'); +SELECT x FROM format(JSONEachRow, 'x IPv6', '{"x":""}'); +SELECT x FROM format(JSONEachRow, 'x UUID', '{"x":""}'); +-- { echoOff } + +-- Simple type AggregateFunction +DROP TABLE IF EXISTS table1; +CREATE TABLE table1(col AggregateFunction(uniq, UInt64)) ENGINE=Memory(); +DROP TABLE IF EXISTS table2; +CREATE TABLE table2(UserID UInt64) ENGINE=Memory(); + +INSERT INTO table1 SELECT uniqState(UserID) FROM table2; +INSERT INTO table1 SELECT x FROM format(JSONEachRow, 'x AggregateFunction(uniq, UInt64)' AS T, '{"x":""}'); + +-- { echoOn } +SELECT COUNT(DISTINCT col) FROM table1; +-- { echoOff } + +DROP TABLE table1; +DROP TABLE table2; + +-- The setting input_format_defaults_for_omitted_fields determines the default value if enabled. +CREATE TABLE table1(address IPv6 DEFAULT toIPv6('2001:db8:3333:4444:5555:6666:7777:8888')) ENGINE=Memory(); + +SET input_format_defaults_for_omitted_fields = 0; +INSERT INTO table1 FORMAT JSONEachRow {"address":""}; + +SET input_format_defaults_for_omitted_fields = 1; +INSERT INTO table1 FORMAT JSONEachRow {"address":""}; + +-- { echoOn } +SELECT * FROM table1 ORDER BY address ASC; +-- { echoOff } + +DROP TABLE table1; + +-- Nullable +-- { echoOn } +SELECT x FROM format(JSONEachRow, 'x Nullable(IPv6)', '{"x":""}'); + +-- Compound types +SELECT x FROM format(JSONEachRow, 'x Array(UUID)', '{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e",""]}'); +SELECT x FROM format(JSONEachRow, 'x Array(Nullable(IPv6))', '{"x":["",""]}'); +SELECT x FROM format(JSONEachRow, 'x Tuple(Date, IPv4, String)', '{"x":["", "", "abc"]}'); +SELECT x FROM format(JSONEachRow, 'x Map(String, IPv6)', '{"x":{"abc": ""}}'); +SELECT x FROM format(JSONEachRow, 'x Variant(Date, UUID)', '{"x":""}'); + +-- Deep composition +SELECT x FROM format(JSONEachRow, 'x Array(Array(IPv6))', '{"x":[["2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF", ""], ["", "2001:db8:3333:4444:5555:6666:7777:8888"]]}'); +SELECT x FROM format(JSONEachRow, 'x Variant(Date, Array(UUID))', '{"x":["", "b15f852c-c41a-4fd6-9247-1929c841715e"]}'); +SELECT x FROM format(JSONEachRow, 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))', '{"x":[[""], ["",{"abc":""}]]}'); +SELECT x FROM format(JSONEachRow, 'x Map(Tuple(Date,IPv4), Variant(UUID,IPv6))', '{"x":{["",""]:""}}'); diff --git a/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.reference b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.reference new file mode 100644 index 00000000000..8176d7895d8 --- /dev/null +++ b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.reference @@ -0,0 +1,8 @@ +Array(UUID) +{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e","00000000-0000-0000-0000-000000000000"]} +{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e","00000000-0000-0000-0000-000000000000"]} +{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e","00000000-0000-0000-0000-000000000000"]} +Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6))) +{"x":[["00000000-0000-0000-0000-000000000000"],["00000000-0000-0000-0000-000000000000",{"abc":"::"}]]} +{"x":[["00000000-0000-0000-0000-000000000000"],["00000000-0000-0000-0000-000000000000",{"abc":"::"}]]} +{"x":[["00000000-0000-0000-0000-000000000000"],["00000000-0000-0000-0000-000000000000",{"abc":"::"}]]} diff --git a/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.sh b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.sh new file mode 100755 index 00000000000..6b69fb2e9dc --- /dev/null +++ b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.json + +# Wrapper for clickhouse-client to always output in JSONEachRow format, that +# way format settings will not affect output. +function clickhouse_local() +{ + $CLICKHOUSE_LOCAL --output-format JSONEachRow "$@" +} + +echo 'Array(UUID)' +echo '{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e",""]}' > $DATA_FILE +# Use increasingly smaller read buffers. +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Array(UUID)') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=4" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Array(UUID)') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=2" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Array(UUID)') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=1" + +echo 'Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))' +echo '{"x":[[""], ["",{"abc":""}]]}' > $DATA_FILE +# Use increasingly smaller read buffers. +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=16" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=8" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=1" + +rm $DATA_FILE