Disable vectorization for uint64 --> float32 cast

2024-12-04 05:22:17 +00:00 · 2022-07-22 10:59:56 +00:00 · 2022-07-22 10:59:56 +00:00 · 6a631426b7
commit 6a631426b7
parent cad0e7a62c
1 changed files with 21 additions and 2 deletions
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@ -324,11 +324,30 @@ struct ConvertImpl
                    }
                    else
                    {
                        if constexpr (std::is_same_v<FromDataType, DataTypeUInt64> && std::is_same_v<ToDataType, DataTypeFloat32>)
                        {
                            /// Turns out that when ClickHouse is compiled with AVX1 or AVX2 instructions, Clang's autovectorizer produces
                            /// code for UInt64-to-Float23 conversion which is only ~50% as fast as scalar code. Interestingly, scalar code
                            /// is equally fast than code compiled for SSE4.2, so we might as well disable vectorization. This situation
                            /// may change with AVX512 which has a dediated instruction for that usecase (_mm512_cvtepi64_ps).
 #if defined(__x86_64__)
 #  ifdef __clang__
 #    pragma clang loop vectorize(disable) interleave(disable)
 #  endif
 #endif
                            for (size_t i = 0; i < input_rows_count; ++i)
                            {
                                vec_to[i] = static_cast<ToFieldType>(vec_from[i]);
                            }
                        }
                        else
                        {
                            for (size_t i = 0; i < input_rows_count; ++i)
                            {
                                vec_to[i] = static_cast<ToFieldType>(vec_from[i]);
                            }
                        }
                    }
                }
            }