Merge pull request #39000 from ClickHouse/avx-enablement

Avx enablement
This commit is contained in:
Alexey Milovidov 2022-07-30 04:51:07 +03:00 committed by GitHub
commit 8f348edbbd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 97 additions and 29 deletions

View File

@ -16,7 +16,7 @@ option (ENABLE_SSE41 "Use SSE4.1 instructions on x86_64" 1)
option (ENABLE_SSE42 "Use SSE4.2 instructions on x86_64" 1)
option (ENABLE_PCLMULQDQ "Use pclmulqdq instructions on x86_64" 1)
option (ENABLE_POPCNT "Use popcnt instructions on x86_64" 1)
option (ENABLE_AVX "Use AVX instructions on x86_64" 0)
option (ENABLE_AVX "Use AVX instructions on x86_64" 1)
option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 0)
option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0)
option (ENABLE_AVX512_VBMI "Use AVX512_VBMI instruction on x86_64 (depends on ENABLE_AVX512)" 0)

View File

@ -191,27 +191,29 @@ struct ConvertImpl
vec_null_map_to = &col_null_map_to->getData();
}
bool result_is_bool = isBool(result_type);
for (size_t i = 0; i < input_rows_count; ++i)
if constexpr (std::is_same_v<ToDataType, DataTypeUInt8>)
{
if constexpr (std::is_same_v<ToDataType, DataTypeUInt8>)
if (isBool(result_type))
{
if (result_is_bool)
for (size_t i = 0; i < input_rows_count; ++i)
{
vec_to[i] = vec_from[i] != FromFieldType(0);
continue;
}
goto done;
}
}
if constexpr (std::is_same_v<FromDataType, DataTypeUUID> != std::is_same_v<ToDataType, DataTypeUUID>)
if constexpr (std::is_same_v<FromDataType, DataTypeUUID> != std::is_same_v<ToDataType, DataTypeUUID>)
{
throw Exception("Conversion between numeric types and UUID is not supported. Probably the passed UUID is unquoted", ErrorCodes::NOT_IMPLEMENTED);
}
else
{
if constexpr (IsDataTypeDecimal<FromDataType> || IsDataTypeDecimal<ToDataType>)
{
throw Exception("Conversion between numeric types and UUID is not supported. Probably the passed UUID is unquoted", ErrorCodes::NOT_IMPLEMENTED);
}
else
{
if constexpr (IsDataTypeDecimal<FromDataType> || IsDataTypeDecimal<ToDataType>)
if constexpr (std::is_same_v<Additions, AccurateOrNullConvertStrategyAdditions>)
{
if constexpr (std::is_same_v<Additions, AccurateOrNullConvertStrategyAdditions>)
for (size_t i = 0; i < input_rows_count; ++i)
{
ToFieldType result;
bool convert_result = false;
@ -231,7 +233,10 @@ struct ConvertImpl
(*vec_null_map_to)[i] = true;
}
}
else
}
else
{
for (size_t i = 0; i < input_rows_count; ++i)
{
if constexpr (IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>)
vec_to[i] = convertDecimals<FromDataType, ToDataType>(vec_from[i], col_from->getScale(), col_to->getScale());
@ -243,10 +248,13 @@ struct ConvertImpl
throw Exception("Unsupported data type in conversion function", ErrorCodes::CANNOT_CONVERT_TYPE);
}
}
else
}
else
{
/// If From Data is Nan or Inf and we convert to integer type, throw exception
if constexpr (std::is_floating_point_v<FromFieldType> && !std::is_floating_point_v<ToFieldType>)
{
/// If From Data is Nan or Inf and we convert to integer type, throw exception
if constexpr (std::is_floating_point_v<FromFieldType> && !std::is_floating_point_v<ToFieldType>)
for (size_t i = 0; i < input_rows_count; ++i)
{
if (!isFinite(vec_from[i]))
{
@ -254,15 +262,46 @@ struct ConvertImpl
{
vec_to[i] = 0;
(*vec_null_map_to)[i] = true;
continue;
}
else
throw Exception("Unexpected inf or nan to integer conversion", ErrorCodes::CANNOT_CONVERT_TYPE);
}
}
else
{
if constexpr (std::is_same_v<Additions, AccurateOrNullConvertStrategyAdditions>
|| std::is_same_v<Additions, AccurateConvertStrategyAdditions>)
{
bool convert_result = accurate::convertNumeric(vec_from[i], vec_to[i]);
if constexpr (std::is_same_v<Additions, AccurateOrNullConvertStrategyAdditions>
|| std::is_same_v<Additions, AccurateConvertStrategyAdditions>)
if (!convert_result)
{
if (std::is_same_v<Additions, AccurateOrNullConvertStrategyAdditions>)
{
vec_to[i] = 0;
(*vec_null_map_to)[i] = true;
}
else
{
throw Exception(
"Value in column " + named_from.column->getName() + " cannot be safely converted into type "
+ result_type->getName(),
ErrorCodes::CANNOT_CONVERT_TYPE);
}
}
}
else
{
vec_to[i] = static_cast<ToFieldType>(vec_from[i]);
}
}
}
goto done;
}
if constexpr (std::is_same_v<Additions, AccurateOrNullConvertStrategyAdditions>
|| std::is_same_v<Additions, AccurateConvertStrategyAdditions>)
{
for (size_t i = 0; i < input_rows_count; ++i)
{
bool convert_result = accurate::convertNumeric(vec_from[i], vec_to[i]);
@ -282,14 +321,38 @@ struct ConvertImpl
}
}
}
}
else
{
if constexpr (std::is_same_v<FromDataType, DataTypeUInt64> && std::is_same_v<ToDataType, DataTypeFloat32>)
{
/// Turns out that when ClickHouse is compiled with AVX1 or AVX2 instructions, Clang's autovectorizer produces
/// code for UInt64-to-Float23 conversion which is only ~50% as fast as scalar code. Interestingly, scalar code
/// is equally fast than code compiled for SSE4.2, so we might as well disable vectorization. This situation
/// may change with AVX512 which has a dediated instruction for that usecase (_mm512_cvtepi64_ps).
#if defined(__x86_64__)
# ifdef __clang__
# pragma clang loop vectorize(disable) interleave(disable)
# endif
#endif
for (size_t i = 0; i < input_rows_count; ++i)
{
vec_to[i] = static_cast<ToFieldType>(vec_from[i]);
}
}
else
{
vec_to[i] = static_cast<ToFieldType>(vec_from[i]);
for (size_t i = 0; i < input_rows_count; ++i)
{
vec_to[i] = static_cast<ToFieldType>(vec_from[i]);
}
}
}
}
}
done:
if constexpr (std::is_same_v<Additions, AccurateOrNullConvertStrategyAdditions>)
return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to));
else

View File

@ -1,7 +0,0 @@
Instruction check fail. The CPU does not support SSSE3 instruction set.
Instruction check fail. The CPU does not support SSE4.1 instruction set.
Instruction check fail. The CPU does not support SSE4.2 instruction set.
Instruction check fail. The CPU does not support POPCNT instruction set.
<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)
<jemalloc>: (This is the expected behaviour if you are running under QEMU)
1

View File

@ -2,6 +2,18 @@
# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest, no-cpu-aarch64
# Tag no-fasttest: avoid dependency on qemu -- invonvenient when running locally
# More than a decade after AVX was released, AVX is still not supported by QEMU, even if "-cpu help" pretends to. As a result, we cannot use
# QEMU to verify that a ClickHouse binary compiled for a SIMD level up to AVX runs on a system with a SIMD level up to AVX. The alternative
# is to disassemble the binary and grep for unwanted instructions (e.g. AVX512) which is just too fragile ...
#
# https://gitlab.com/qemu-project/qemu/-/issues/164
# https://www.mail-archive.com/qemu-devel@nongnu.org/msg713932.html
# https://lore.kernel.org/all/CAObpvQmejWBh+RNz2vhk16-kcY_QveM_pSmM5ZeWqWv1d8AJzQ@mail.gmail.com/T/
exit 0
# keeping the original test because it is instructive and maybe QEMU will be fixed at some point ...
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh