mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-01 20:12:02 +00:00
lz4 decompress: dynamic dispatch with TargetSpecific
This commit is contained in:
parent
397a4ac888
commit
c9e15017ac
@ -4,7 +4,7 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <Core/Defines.h>
|
#include <Core/Defines.h>
|
||||||
#include <Common/Stopwatch.h>
|
#include <Common/Stopwatch.h>
|
||||||
#include <Common/CpuId.h>
|
#include <Common/TargetSpecific.h>
|
||||||
#include <base/types.h>
|
#include <base/types.h>
|
||||||
#include <base/unaligned.h>
|
#include <base/unaligned.h>
|
||||||
|
|
||||||
@ -16,7 +16,7 @@
|
|||||||
#include <tmmintrin.h>
|
#include <tmmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__AVX512VBMI__)
|
#if USE_MULTITARGET_CODE
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -408,9 +408,9 @@ inline void copyOverlap32(UInt8 * op, const UInt8 *& match, const size_t offset)
|
|||||||
match += shift4[offset];
|
match += shift4[offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DECLARE_AVX512VBMI_SPECIFIC_CODE(
|
||||||
inline void copyOverlap32Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset)
|
inline void copyOverlap32Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset)
|
||||||
{
|
{
|
||||||
#if defined(__AVX512VBMI__) && !defined(MEMORY_SANITIZER)
|
|
||||||
static constexpr UInt8 __attribute__((__aligned__(32))) masks[] =
|
static constexpr UInt8 __attribute__((__aligned__(32))) masks[] =
|
||||||
{
|
{
|
||||||
0, 1, 2, 2, 4, 2, 2, 4, 8, 5, 2, 10, 8, 6, 4, 2, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, /* offset=0, shift amount index. */
|
0, 1, 2, 2, 4, 2, 2, 4, 8, 5, 2, 10, 8, 6, 4, 2, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, /* offset=0, shift amount index. */
|
||||||
@ -447,28 +447,25 @@ inline void copyOverlap32Shuffle(UInt8 * op, const UInt8 *& match, const size_t
|
|||||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0,
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (DB::Cpu::CpuFlagsCache::have_AVX512VBMI)
|
|
||||||
{
|
|
||||||
_mm256_storeu_si256(reinterpret_cast<__m256i *>(op),
|
_mm256_storeu_si256(reinterpret_cast<__m256i *>(op),
|
||||||
_mm256_permutexvar_epi8(
|
_mm256_permutexvar_epi8(
|
||||||
_mm256_load_si256(reinterpret_cast<const __m256i *>(masks) + offset),
|
_mm256_load_si256(reinterpret_cast<const __m256i *>(masks) + offset),
|
||||||
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(match))));
|
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(match))));
|
||||||
match += masks[offset];
|
match += masks[offset];
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
copyOverlap32(op, match, offset);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
copyOverlap32(op, match, offset);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
) /// DECLARE_AVX512VBMI_SPECIFIC_CODE
|
||||||
|
|
||||||
|
|
||||||
template <> void inline copy<32>(UInt8 * dst, const UInt8 * src) { copy32(dst, src); }
|
template <> void inline copy<32>(UInt8 * dst, const UInt8 * src) { copy32(dst, src); }
|
||||||
template <> void inline wildCopy<32>(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { wildCopy32(dst, src, dst_end); }
|
template <> void inline wildCopy<32>(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { wildCopy32(dst, src, dst_end); }
|
||||||
template <> void inline copyOverlap<32, false>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap32(op, match, offset); }
|
template <> void inline copyOverlap<32, false>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap32(op, match, offset); }
|
||||||
template <> void inline copyOverlap<32, true>(UInt8 * op, const UInt8 *& match, const size_t offset) { copyOverlap32Shuffle(op, match, offset); }
|
template <> void inline copyOverlap<32, true>(UInt8 * op, const UInt8 *& match, const size_t offset) {
|
||||||
|
#if USE_MULTITARGET_CODE
|
||||||
|
TargetSpecific::AVX512VBMI::copyOverlap32Shuffle(op, match, offset);
|
||||||
|
#else
|
||||||
|
copyOverlap32(op, match, offset);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// See also https://stackoverflow.com/a/30669632
|
/// See also https://stackoverflow.com/a/30669632
|
||||||
@ -641,9 +638,9 @@ bool decompress(
|
|||||||
if (dest_size >= 32768)
|
if (dest_size >= 32768)
|
||||||
{
|
{
|
||||||
size_t variant_size = 4;
|
size_t variant_size = 4;
|
||||||
#if defined(__AVX512VBMI__) && !defined(MEMORY_SANITIZER)
|
#if USE_MULTITARGET_CODE && !defined(MEMORY_SANITIZER)
|
||||||
/// best_variant == 4 only valid when AVX512VBMI available
|
/// best_variant == 4 only valid when AVX512VBMI available
|
||||||
if (DB::Cpu::CpuFlagsCache::have_AVX512VBMI)
|
if (isArchSupported(DB::TargetArch::AVX512VBMI))
|
||||||
variant_size = 5;
|
variant_size = 5;
|
||||||
#endif
|
#endif
|
||||||
size_t best_variant = statistics.select(variant_size);
|
size_t best_variant = statistics.select(variant_size);
|
||||||
|
Loading…
Reference in New Issue
Block a user