diff --git a/base/base/StringRef.h b/base/base/StringRef.h index 9a368f21e7d..5ee197021ca 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -61,11 +61,6 @@ struct StringRef constexpr explicit operator std::string_view() const { return std::string_view(data, size); } }; -/// Here constexpr doesn't implicate inline, see https://www.viva64.com/en/w/v1043/ -/// nullptr can't be used because the StringRef values are used in SipHash's pointer arithmetic -/// and the UBSan thinks that something like nullptr + 8 is UB. -constexpr const inline char empty_string_ref_addr{}; -constexpr const inline StringRef EMPTY_STRING_REF{&empty_string_ref_addr, 0}; using StringRefs = std::vector; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 809024316bf..20c9965c8c3 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -24,6 +24,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int ILLEGAL_COLUMN; extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT; + extern const int NOT_IMPLEMENTED; } @@ -40,6 +41,14 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn throw Exception{"ColumnNullable cannot have constant null map", ErrorCodes::ILLEGAL_COLUMN}; } +StringRef ColumnNullable::getDataAt(size_t n) const +{ + if (!isNullAt(n)) + return getNestedColumn().getDataAt(n); + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDataAt is not supported for {} in case if value is NULL", getName()); +} + void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const { const auto & arr = getNullMapData(); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index e832f6d20e5..064becbf2f4 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -59,19 +59,7 @@ public: bool getBool(size_t n) const override { return isNullAt(n) ? false : nested_column->getBool(n); } UInt64 get64(size_t n) const override { return nested_column->get64(n); } bool isDefaultAt(size_t n) const override { return isNullAt(n); } - - /** - * If isNullAt(n) returns false, returns the nested column's getDataAt(n), otherwise returns a special value - * EMPTY_STRING_REF indicating that data is not present. - */ - StringRef getDataAt(size_t n) const override - { - if (isNullAt(n)) - return EMPTY_STRING_REF; - - return getNestedColumn().getDataAt(n); - } - + StringRef getDataAt(size_t) const override; /// Will insert null value if pos=nullptr void insertData(const char * pos, size_t length) override; StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 58891e30e12..d3ab87410f3 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -136,15 +136,16 @@ public: UInt128 getHash() const override { return hash.getHash(*getRawColumnPtr()); } + /// This is strange. Please remove this method as soon as possible. std::optional getOrFindValueIndex(StringRef value) const override { if (std::optional res = reverse_index.getIndex(value); res) return res; - auto& nested = *getNestedColumn(); + const IColumn & nested = *getNestedColumn(); for (size_t i = 0; i < nested.size(); ++i) - if (nested.getDataAt(i) == value) + if (!nested.isNullAt(i) && nested.getDataAt(i) == value) return i; return {}; diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index 9502daaf65b..97c9119ebd6 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -65,30 +65,12 @@ public: virtual size_t uniqueDeserializeAndInsertFromArena(const char * pos, const char *& new_pos) = 0; - /// Returns dictionary hash which is sipHash is applied to each row of nested column. + /// Returns dictionary hash which is SipHash is applied to each row of nested column. virtual UInt128 getHash() const = 0; const char * getFamilyName() const override { return "ColumnUnique"; } TypeIndex getDataType() const override { return getNestedColumn()->getDataType(); } - /** - * Given some value (usually, of type @e ColumnType) @p value that is convertible to DB::StringRef, obtains its - * index in the DB::ColumnUnique::reverse_index hashtable. - * - * The reverse index (StringRef => UInt64) is built lazily, so there are two variants: - * - On the function call it's present. Therefore we obtain the index in O(1). - * - The reverse index is absent. We search for the index linearly. - * - * @see DB::ReverseIndex - * @see DB::ColumnUnique - * - * The most common example uses https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality/ columns. - * Consider data type @e LC(String). The inner type here is @e String which is more or less a contiguous memory - * region, so it can be easily represented as a @e StringRef. So we pass that ref to this function and get its - * index in the dictionary, which can be used to operate with the indices column. - */ - virtual std::optional getOrFindValueIndex(StringRef value) const = 0; - void insert(const Field &) override { throw Exception("Method insert is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED); @@ -190,6 +172,23 @@ public: { throw Exception("Method hasEqualValues is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED); } + + /** Given some value (usually, of type @e ColumnType) @p value that is convertible to DB::StringRef, obtains its + * index in the DB::ColumnUnique::reverse_index hashtable. + * + * The reverse index (StringRef => UInt64) is built lazily, so there are two variants: + * - On the function call it's present. Therefore we obtain the index in O(1). + * - The reverse index is absent. We search for the index linearly. + * + * @see DB::ReverseIndex + * @see DB::ColumnUnique + * + * The most common example uses https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality/ columns. + * Consider data type @e LC(String). The inner type here is @e String which is more or less a contiguous memory + * region, so it can be easily represented as a @e StringRef. So we pass that ref to this function and get its + * index in the dictionary, which can be used to operate with the indices column. + */ + virtual std::optional getOrFindValueIndex(StringRef value) const = 0; }; using ColumnUniquePtr = IColumnUnique::ColumnUniquePtr; diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index 0dbbe5e41b6..7f0946e3e50 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -740,42 +740,34 @@ private: const auto [null_map_data, null_map_item] = getNullMaps(arguments); - const IColumn& col_arg = *arguments[1].column.get(); - - if (const ColumnConst * const col_arg_const = checkAndGetColumn(col_arg)) + if (const ColumnConst * col_arg_const = checkAndGetColumn(*arguments[1].column)) { - const IColumnUnique& col_lc_dict = col_lc->getDictionary(); - - const bool different_inner_types = col_lc_dict.isNullable() - ? !col_arg_const->structureEquals(*col_lc_dict.getNestedColumn().get()) - : true; // Can't compare so ignore this check - - const bool use_cloned_arg = col_arg_const->isNumeric() - // outer types do not match - && !col_arg_const->structureEquals(col_lc_dict) - // inner types do not match (like A and Nullable(B) or A and Const(B)); - && different_inner_types; + const IColumnUnique & col_lc_dict = col_lc->getDictionary(); const DataTypeArray * const array_type = checkAndGetDataType(arguments[0].type.get()); const DataTypePtr target_type_ptr = recursiveRemoveLowCardinality(array_type->getNestedType()); - const ColumnPtr col_arg_cloned = use_cloned_arg - ? castColumn(arguments[1], target_type_ptr) - : col_arg_const->getPtr(); + ColumnPtr col_arg_cloned = castColumn( + {col_arg_const->getDataColumnPtr(), arguments[1].type, arguments[1].name}, target_type_ptr); - const StringRef elem = col_arg_cloned->getDataAt(0); ResultColumnPtr col_result = ResultColumnType::create(); - UInt64 index = 0; - if (elem != EMPTY_STRING_REF) + if (!col_arg_cloned->isNullAt(0)) { + if (col_arg_cloned->isNullable()) + col_arg_cloned = checkAndGetColumn(*col_arg_cloned)->getNestedColumnPtr(); + + StringRef elem = col_arg_cloned->getDataAt(0); + if (std::optional maybe_index = col_lc_dict.getOrFindValueIndex(elem); maybe_index) + { index = *maybe_index; + } else { const size_t offsets_size = col_array->getOffsets().size(); - auto& data = col_result->getData(); + auto & data = col_result->getData(); data.resize_fill(offsets_size); @@ -786,7 +778,7 @@ private: Impl::Main::vector( col_lc->getIndexes(), col_array->getOffsets(), - index, + index, /** Assuming LowCardinality has index of NULL always as zero. */ col_result->getData(), null_map_data, null_map_item); @@ -800,9 +792,9 @@ private: const NullMap * const null_map_left_casted = &left_nullable.getNullMapColumn().getData(); - const IColumn& left_ptr = left_nullable.getNestedColumn(); + const IColumn & left_ptr = left_nullable.getNestedColumn(); - const ColumnPtr right_casted = col_arg.convertToFullColumnIfLowCardinality(); + const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality(); const ColumnNullable * const right_nullable = checkAndGetColumn(right_casted.get()); const NullMap * const null_map_right_casted = right_nullable @@ -825,17 +817,17 @@ private: } else // LowCardinality(T) and U, T not Nullable { - if (col_arg.isNullable()) + if (arguments[1].column->isNullable()) return nullptr; - if (const auto* const arg_lc = checkAndGetColumn(&col_arg); + if (const auto* const arg_lc = checkAndGetColumn(arguments[1].column.get()); arg_lc && arg_lc->isNullable()) return nullptr; // LowCardinality(T) and U (possibly LowCardinality(V)) const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); - const ColumnPtr right_casted = col_arg.convertToFullColumnIfLowCardinality(); + const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality(); ExecutionData data = { diff --git a/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.cpp b/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.cpp index 1d0e987f0c4..1627bb0cad0 100644 --- a/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.cpp @@ -17,8 +17,11 @@ RawBLOBRowOutputFormat::RawBLOBRowOutputFormat( void RawBLOBRowOutputFormat::writeField(const IColumn & column, const ISerialization &, size_t row_num) { - std::string_view value = column.getDataAt(row_num).toView(); - out.write(value.data(), value.size()); + if (!column.isNullAt(row_num)) + { + auto value = column.getDataAt(row_num); + out.write(value.data, value.size); + } } @@ -35,4 +38,3 @@ void registerOutputFormatRawBLOB(FormatFactory & factory) } } - diff --git a/tests/queries/0_stateless/02419_contingency_array_nullable.reference b/tests/queries/0_stateless/02419_contingency_array_nullable.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02419_contingency_array_nullable.sql b/tests/queries/0_stateless/02419_contingency_array_nullable.sql new file mode 100644 index 00000000000..5d56e259d2f --- /dev/null +++ b/tests/queries/0_stateless/02419_contingency_array_nullable.sql @@ -0,0 +1 @@ +SELECT contingency(1, [1, NULL]); -- { serverError 48 }