diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 75505a173cd..e824546f220 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -1112,7 +1112,9 @@ private: { ToType h; if constexpr (std::endian::native == std::endian::little) + { h = apply(key, reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); + } else { char tmp_buffer[sizeof(vec_from[i])]; @@ -1131,7 +1133,9 @@ private: ToType h; if constexpr (std::endian::native == std::endian::little) + { h = apply(key, reinterpret_cast(&value), sizeof(value)); + } else { char tmp_buffer[sizeof(value)]; @@ -1271,7 +1275,7 @@ private: { /// NOTE: here, of course, you can do without the materialization of the column. ColumnPtr full_column = col_from_const->convertToFullColumn(); - executeArray(key, type, &*full_column, vec_to); + executeArray(key, type, full_column.get(), vec_to); } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", @@ -1283,6 +1287,10 @@ private: { WhichDataType which(from_type); + if (icolumn->size() != vec_to.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}", + icolumn->getName(), icolumn->size(), vec_to.size(), getName()); + if (which.isUInt8()) executeIntType(key, icolumn, vec_to); else if (which.isUInt16()) executeIntType(key, icolumn, vec_to); else if (which.isUInt32()) executeIntType(key, icolumn, vec_to); @@ -1343,10 +1351,9 @@ private: const auto & type_map = assert_cast(*type); executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first); } - else if (const auto * const_map = checkAndGetColumnConstData(column)) + else if (const auto * const_map = checkAndGetColumnConst(column)) { - const auto & type_map = assert_cast(*type); - executeForArgument(key, type_map.getNestedType().get(), const_map->getNestedColumnPtr().get(), vec_to, is_first); + executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first); } else { @@ -1382,8 +1389,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - size_t rows = input_rows_count; - auto col_to = ColumnVector::create(rows); + auto col_to = ColumnVector::create(input_rows_count); typename ColumnVector::Container & vec_to = col_to->getData(); @@ -1395,7 +1401,7 @@ public: if (arguments.size() <= first_data_argument) { /// Return a fixed random-looking magic number when input is empty - vec_to.assign(rows, static_cast(0xe28dbde7fe22e41c)); + vec_to.assign(input_rows_count, static_cast(0xe28dbde7fe22e41c)); } KeyType key{}; diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 1c30dee0482..1f25794536b 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -26,6 +26,7 @@ # pragma GCC diagnostic pop #endif + namespace DB { @@ -66,12 +67,12 @@ ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( if (!low_cardinality_type) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Incompatible type for low cardinality column: {}", + "Incompatible type for LowCardinality column: {}", column.type->getName()); if (can_be_executed_on_default_arguments) { - /// Normal case, when function can be executed on values's default. + /// Normal case, when function can be executed on values' default. column.column = low_cardinality_column->getDictionary().getNestedColumn(); indexes = low_cardinality_column->getIndexesPtr(); } @@ -280,6 +281,7 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType auto res = executeWithoutLowCardinalityColumns(columns_without_low_cardinality, dictionary_type, new_input_rows_count, dry_run); bool res_is_constant = isColumnConst(*res); + auto keys = res_is_constant ? res->cloneResized(1)->convertToFullColumnIfConst() : res; diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 3160c5ddb43..7fd4d3aa6d5 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -332,7 +332,7 @@ public: } size_t col_key_size = sub_map_column->size(); - auto column = is_const? ColumnConst::create(std::move(sub_map_column), std::move(col_key_size)) : std::move(sub_map_column); + auto column = is_const ? ColumnConst::create(std::move(sub_map_column), std::move(col_key_size)) : std::move(sub_map_column); ColumnsWithTypeAndName new_arguments = { diff --git a/tests/queries/0_stateless/02673_map_hashing_msan.reference b/tests/queries/0_stateless/02673_map_hashing_msan.reference new file mode 100644 index 00000000000..d3fbc1377d4 --- /dev/null +++ b/tests/queries/0_stateless/02673_map_hashing_msan.reference @@ -0,0 +1,6 @@ +4786021384179797717 +5368498105280294197 +42 15687122600100720591 +42 15687122600100720591 +42 15687122600100720591 +\N diff --git a/tests/queries/0_stateless/02673_map_hashing_msan.sql b/tests/queries/0_stateless/02673_map_hashing_msan.sql new file mode 100644 index 00000000000..2fe3620e68c --- /dev/null +++ b/tests/queries/0_stateless/02673_map_hashing_msan.sql @@ -0,0 +1,7 @@ +SELECT cityHash64(map(1, 'Hello'), CAST(materialize('World') AS LowCardinality(String))); +SELECT cityHash64(map(), CAST(materialize('') AS LowCardinality(Nullable(String)))); +SELECT materialize(42) as last_element, cityHash64(map(), CAST(materialize('') AS LowCardinality(Nullable(String))), last_element) from numbers(3); + +SET allow_suspicious_low_cardinality_types = 1; +CREATE TEMPORARY TABLE datetime__fuzz_14 (`d` LowCardinality(Nullable(UInt128))); +SELECT max(mapPopulateSeries(mapPopulateSeries(map(toInt64(1048), toInt64(9223), 3, -2147))), toInt64(1048), map('11', 257, '', NULL), cityHash64(*)) > NULL FROM (SELECT max(cityHash64(mapPopulateSeries(mapPopulateSeries(map(toInt64(1048), toInt64(2147), 655, -2147))), *)) > NULL, map(toInt64(-2147), toInt64(100.0001), -2147, NULL), mapPopulateSeries(map(toInt64(1024), toInt64(1048), 1048, -1)), map(toInt64(256), toInt64(NULL), -1, NULL), quantile(0.0001)(d) FROM datetime__fuzz_14 WITH TOTALS);