mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 07:01:59 +00:00
Merge branch 'master' into mvcc_prototype
This commit is contained in:
commit
1fe50ad201
4
.github/PULL_REQUEST_TEMPLATE.md
vendored
4
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -1,4 +1,4 @@
|
||||
Changelog category (leave one):
|
||||
### Changelog category (leave one):
|
||||
- New Feature
|
||||
- Improvement
|
||||
- Bug Fix (user-visible misbehaviour in official stable or prestable release)
|
||||
@ -9,7 +9,7 @@ Changelog category (leave one):
|
||||
- Not for changelog (changelog entry is not required)
|
||||
|
||||
|
||||
Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md):
|
||||
### Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md):
|
||||
...
|
||||
|
||||
|
||||
|
2
contrib/unixodbc
vendored
2
contrib/unixodbc
vendored
@ -1 +1 @@
|
||||
Subproject commit b0ad30f7f6289c12b76f04bfb9d466374bb32168
|
||||
Subproject commit a2cd5395e8c7f7390025ec93af5bfebef3fb5fcd
|
@ -20,6 +20,8 @@ ENV LANG=en_US.UTF-8 \
|
||||
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/
|
||||
COPY --from=glibc-donor /etc/nsswitch.conf /etc/
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
|
||||
ARG TARGETARCH
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& case $arch in \
|
||||
amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \
|
||||
|
@ -810,7 +810,7 @@ void Client::addOptions(OptionsDescription & options_description)
|
||||
("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
|
||||
|
||||
("max_client_network_bandwidth", po::value<int>(), "the maximum speed of data exchange over the network for the client in bytes per second.")
|
||||
("compression", po::value<bool>(), "enable or disable compression")
|
||||
("compression", po::value<bool>(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).")
|
||||
|
||||
("query-fuzzer-runs", po::value<int>()->default_value(0), "After executing every SELECT query, do random mutations in it and run again specified number of times. This is used for testing to discover unexpected corner cases.")
|
||||
("interleave-queries-file", po::value<std::vector<std::string>>()->multitoken(),
|
||||
|
@ -49,6 +49,18 @@ if (COMPILER_GCC)
|
||||
add_definitions ("-fno-tree-loop-distribute-patterns")
|
||||
endif ()
|
||||
|
||||
# ClickHouse developers may use platform-dependent code under some macro (e.g. `#ifdef ENABLE_MULTITARGET`).
|
||||
# If turned ON, this option defines such macro.
|
||||
# See `src/Common/TargetSpecific.h`
|
||||
option(ENABLE_MULTITARGET_CODE "Enable platform-dependent code" ON)
|
||||
|
||||
if (ENABLE_MULTITARGET_CODE)
|
||||
add_definitions(-DENABLE_MULTITARGET_CODE=1)
|
||||
else()
|
||||
add_definitions(-DENABLE_MULTITARGET_CODE=0)
|
||||
endif()
|
||||
|
||||
|
||||
add_subdirectory (Access)
|
||||
add_subdirectory (Backups)
|
||||
add_subdirectory (Columns)
|
||||
|
@ -1,4 +1,4 @@
|
||||
#include <Functions/TargetSpecific.h>
|
||||
#include <Common/TargetSpecific.h>
|
||||
|
||||
#include <Common/CpuId.h>
|
||||
|
@ -334,15 +334,17 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File
|
||||
read_buffer_for_file_segment->seek(file_offset_of_buffer_end, SEEK_SET);
|
||||
}
|
||||
|
||||
auto impl_range = read_buffer_for_file_segment->getRemainingReadRange();
|
||||
auto download_offset = file_segment->getDownloadOffset();
|
||||
if (download_offset != static_cast<size_t>(read_buffer_for_file_segment->getPosition()))
|
||||
{
|
||||
auto impl_range = read_buffer_for_file_segment->getRemainingReadRange();
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Buffer's offsets mismatch; cached buffer offset: {}, download_offset: {}, position: {}, implementation buffer offset: {}, "
|
||||
"implementation buffer reading until: {}, file segment info: {}",
|
||||
file_offset_of_buffer_end, download_offset, read_buffer_for_file_segment->getPosition(),
|
||||
impl_range.left, *impl_range.right, file_segment->getInfoForLog());
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
@ -802,12 +804,14 @@ std::optional<size_t> CachedReadBufferFromRemoteFS::getLastNonDownloadedOffset()
|
||||
|
||||
String CachedReadBufferFromRemoteFS::getInfoForLog()
|
||||
{
|
||||
auto implementation_buffer_read_range_str =
|
||||
implementation_buffer ?
|
||||
std::to_string(implementation_buffer->getRemainingReadRange().left)
|
||||
+ '-'
|
||||
+ (implementation_buffer->getRemainingReadRange().right ? std::to_string(*implementation_buffer->getRemainingReadRange().right) : "None")
|
||||
: "None";
|
||||
String implementation_buffer_read_range_str;
|
||||
if (implementation_buffer)
|
||||
{
|
||||
auto read_range = implementation_buffer->getRemainingReadRange();
|
||||
implementation_buffer_read_range_str = std::to_string(read_range.left) + '-' + (read_range.right ? std::to_string(*read_range.right) : "None");
|
||||
}
|
||||
else
|
||||
implementation_buffer_read_range_str = "None";
|
||||
|
||||
auto current_file_segment_info = current_file_segment_it == file_segments_holder->file_segments.end() ? "None" : (*current_file_segment_it)->getInfoForLog();
|
||||
|
||||
|
@ -96,17 +96,6 @@ if (TARGET ch_contrib::rapidjson)
|
||||
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::rapidjson)
|
||||
endif()
|
||||
|
||||
# ClickHouse developers may use platform-dependent code under some macro (e.g. `#ifdef ENABLE_MULTITARGET`).
|
||||
# If turned ON, this option defines such macro.
|
||||
# See `src/Functions/TargetSpecific.h`
|
||||
option(ENABLE_MULTITARGET_CODE "Enable platform-dependent code" ON)
|
||||
|
||||
if (ENABLE_MULTITARGET_CODE)
|
||||
add_definitions(-DENABLE_MULTITARGET_CODE=1)
|
||||
else()
|
||||
add_definitions(-DENABLE_MULTITARGET_CODE=0)
|
||||
endif()
|
||||
|
||||
add_subdirectory(GatherUtils)
|
||||
target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_gatherutils)
|
||||
|
||||
|
@ -1,12 +1,12 @@
|
||||
#pragma once
|
||||
#include <base/map.h>
|
||||
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/GatherUtils/GatherUtils.h>
|
||||
#include <Functions/GatherUtils/Sources.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/PerformanceAdaptors.h>
|
||||
#include <Functions/TargetSpecific.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
|
@ -38,8 +38,8 @@
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/TargetSpecific.h>
|
||||
#include <Functions/PerformanceAdaptors.h>
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <base/range.h>
|
||||
#include <base/bit_cast.h>
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/TargetSpecific.h>
|
||||
#include <Functions/PerformanceAdaptors.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include <Core/AccurateComparison.h>
|
||||
#include <base/range.h>
|
||||
#include "GatherUtils.h"
|
||||
#include "sliceEqualElements.h"
|
||||
#include "sliceHasImplAnyAll.h"
|
||||
|
||||
|
||||
namespace DB::ErrorCodes
|
||||
@ -461,39 +463,19 @@ void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, con
|
||||
}
|
||||
|
||||
|
||||
/// Methods to check if first array has elements from second array, overloaded for various combinations of types.
|
||||
template <
|
||||
ArraySearchType search_type,
|
||||
typename FirstSliceType,
|
||||
typename SecondSliceType,
|
||||
bool (*isEqual)(const FirstSliceType &, const SecondSliceType &, size_t, size_t)>
|
||||
bool sliceHasImplAnyAll(const FirstSliceType & first, const SecondSliceType & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
|
||||
template <typename T>
|
||||
bool insliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
|
||||
size_t first_ind [[maybe_unused]],
|
||||
size_t second_ind [[maybe_unused]])
|
||||
{
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
for (size_t i = 0; i < second.size; ++i)
|
||||
{
|
||||
bool has = false;
|
||||
for (size_t j = 0; j < first.size && !has; ++j)
|
||||
{
|
||||
const bool is_first_null = has_first_null_map && first_null_map[j];
|
||||
const bool is_second_null = has_second_null_map && second_null_map[i];
|
||||
|
||||
if (is_first_null && is_second_null)
|
||||
has = true;
|
||||
|
||||
if (!is_first_null && !is_second_null && isEqual(first, second, j, i))
|
||||
has = true;
|
||||
}
|
||||
|
||||
if (has && search_type == ArraySearchType::Any)
|
||||
return true;
|
||||
|
||||
if (!has && search_type == ArraySearchType::All)
|
||||
return false;
|
||||
}
|
||||
return search_type == ArraySearchType::All;
|
||||
if constexpr (is_decimal<T>)
|
||||
return accurate::equalsOp(first.data[first_ind].value, first.data[second_ind].value);
|
||||
else
|
||||
return accurate::equalsOp(first.data[first_ind], first.data[second_ind]);
|
||||
}
|
||||
inline ALWAYS_INLINE bool insliceEqualElements(const GenericArraySlice & first, size_t first_ind, size_t second_ind)
|
||||
{
|
||||
return first.elements->compareAt(first_ind + first.begin, second_ind + first.begin, *first.elements, -1) == 0;
|
||||
}
|
||||
|
||||
template <
|
||||
@ -620,55 +602,6 @@ bool sliceHasImpl(const FirstSliceType & first, const SecondSliceType & second,
|
||||
return sliceHasImplAnyAll<search_type, FirstSliceType, SecondSliceType, isEqual>(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
|
||||
template <typename T, typename U>
|
||||
bool sliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
|
||||
const NumericArraySlice<U> & second [[maybe_unused]],
|
||||
size_t first_ind [[maybe_unused]],
|
||||
size_t second_ind [[maybe_unused]])
|
||||
{
|
||||
/// TODO: Decimal scale
|
||||
if constexpr (is_decimal<T> && is_decimal<U>)
|
||||
return accurate::equalsOp(first.data[first_ind].value, second.data[second_ind].value);
|
||||
else if constexpr (is_decimal<T> || is_decimal<U>)
|
||||
return false;
|
||||
else
|
||||
return accurate::equalsOp(first.data[first_ind], second.data[second_ind]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool sliceEqualElements(const NumericArraySlice<T> &, const GenericArraySlice &, size_t, size_t)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
bool sliceEqualElements(const GenericArraySlice &, const NumericArraySlice<U> &, size_t, size_t)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE bool sliceEqualElements(const GenericArraySlice & first, const GenericArraySlice & second, size_t first_ind, size_t second_ind)
|
||||
{
|
||||
return first.elements->compareAt(first_ind + first.begin, second_ind + second.begin, *second.elements, -1) == 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool insliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
|
||||
size_t first_ind [[maybe_unused]],
|
||||
size_t second_ind [[maybe_unused]])
|
||||
{
|
||||
if constexpr (is_decimal<T>)
|
||||
return accurate::equalsOp(first.data[first_ind].value, first.data[second_ind].value);
|
||||
else
|
||||
return accurate::equalsOp(first.data[first_ind], first.data[second_ind]);
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE bool insliceEqualElements(const GenericArraySlice & first, size_t first_ind, size_t second_ind)
|
||||
{
|
||||
return first.elements->compareAt(first_ind + first.begin, second_ind + first.begin, *first.elements, -1) == 0;
|
||||
}
|
||||
|
||||
template <ArraySearchType search_type, typename T, typename U>
|
||||
bool sliceHas(const NumericArraySlice<T> & first, const NumericArraySlice<U> & second)
|
||||
{
|
||||
@ -854,4 +787,3 @@ void resizeConstantSize(ArraySource && array_source, ValueSource && value_source
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
|
||||
|
||||
add_headers_and_sources(clickhouse_functions_gatherutils .)
|
||||
add_library(clickhouse_functions_gatherutils ${clickhouse_functions_gatherutils_sources} ${clickhouse_functions_gatherutils_headers})
|
||||
target_link_libraries(clickhouse_functions_gatherutils PRIVATE dbms)
|
||||
@ -14,3 +15,5 @@ endif()
|
||||
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
|
||||
target_compile_options(clickhouse_functions_gatherutils PRIVATE "-g0")
|
||||
endif()
|
||||
|
||||
set_target_properties(clickhouse_functions_gatherutils PROPERTIES COMPILE_FLAGS "${X86_INTRINSICS_FLAGS}")
|
||||
|
41
src/Functions/GatherUtils/sliceEqualElements.h
Normal file
41
src/Functions/GatherUtils/sliceEqualElements.h
Normal file
@ -0,0 +1,41 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/AccurateComparison.h>
|
||||
#include "Slices.h"
|
||||
|
||||
namespace DB::GatherUtils
|
||||
{
|
||||
|
||||
template <typename T, typename U>
|
||||
bool sliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
|
||||
const NumericArraySlice<U> & second [[maybe_unused]],
|
||||
size_t first_ind [[maybe_unused]],
|
||||
size_t second_ind [[maybe_unused]])
|
||||
{
|
||||
/// TODO: Decimal scale
|
||||
if constexpr (is_decimal<T> && is_decimal<U>)
|
||||
return accurate::equalsOp(first.data[first_ind].value, second.data[second_ind].value);
|
||||
else if constexpr (is_decimal<T> || is_decimal<U>)
|
||||
return false;
|
||||
else
|
||||
return accurate::equalsOp(first.data[first_ind], second.data[second_ind]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool sliceEqualElements(const NumericArraySlice<T> &, const GenericArraySlice &, size_t, size_t)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
bool sliceEqualElements(const GenericArraySlice &, const NumericArraySlice<U> &, size_t, size_t)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE bool sliceEqualElements(const GenericArraySlice & first, const GenericArraySlice & second, size_t first_ind, size_t second_ind)
|
||||
{
|
||||
return first.elements->compareAt(first_ind + first.begin, second_ind + second.begin, *second.elements, -1) == 0;
|
||||
}
|
||||
|
||||
}
|
943
src/Functions/GatherUtils/sliceHasImplAnyAll.h
Normal file
943
src/Functions/GatherUtils/sliceHasImplAnyAll.h
Normal file
@ -0,0 +1,943 @@
|
||||
#pragma once
|
||||
|
||||
#include "GatherUtils.h"
|
||||
#include "Slices.h"
|
||||
#include "sliceEqualElements.h"
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
#include <emmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#include <Common/TargetSpecific.h>
|
||||
|
||||
|
||||
namespace DB::GatherUtils
|
||||
{
|
||||
|
||||
inline ALWAYS_INLINE bool hasNull(const UInt8 * null_map, size_t null_map_size)
|
||||
{
|
||||
if (null_map == nullptr)
|
||||
return false;
|
||||
|
||||
for (size_t i = 0; i < null_map_size; ++i)
|
||||
{
|
||||
if (null_map[i])
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
inline ALWAYS_INLINE bool hasAllIntegralLoopRemainder(
|
||||
size_t j, const NumericArraySlice<T> & first, const NumericArraySlice<T> & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
|
||||
{
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
for (; j < second.size; ++j)
|
||||
{
|
||||
// skip null elements since both have at least one - assuming it was checked earlier that at least one element in 'first' is null
|
||||
if (has_second_null_map && second_null_map[j])
|
||||
continue;
|
||||
|
||||
bool found = false;
|
||||
|
||||
for (size_t i = 0; i < first.size; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
|
||||
if (first.data[i] == second.data[j])
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
DECLARE_AVX2_SPECIFIC_CODE (
|
||||
|
||||
// AVX2 Int64, UInt64 specialization
|
||||
template<typename IntType>
|
||||
requires (std::is_same_v<IntType, Int64> || std::is_same_v<IntType, UInt64>)
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt64(
|
||||
const NumericArraySlice<IntType> & first,
|
||||
const NumericArraySlice<IntType> & second,
|
||||
const UInt8 * first_null_map,
|
||||
const UInt8 * second_null_map)
|
||||
{
|
||||
if (second.size == 0)
|
||||
return true;
|
||||
|
||||
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
|
||||
return false;
|
||||
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
size_t j = 0;
|
||||
int has_mask = 1;
|
||||
static constexpr Int64 full = -1, none = 0;
|
||||
const __m256i ones = _mm256_set1_epi64x(full);
|
||||
const __m256i zeros = _mm256_setzero_si256();
|
||||
|
||||
if (second.size > 3 && first.size > 3)
|
||||
{
|
||||
for (; j < second.size - 3 && has_mask; j += 4)
|
||||
{
|
||||
has_mask = 0;
|
||||
const __m256i second_data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(second.data + j));
|
||||
// bits of the bitmask are set to one if considered as null in the corresponding null map, 0 otherwise;
|
||||
__m256i bitmask = has_second_null_map ?
|
||||
_mm256_set_epi64x(
|
||||
(second_null_map[j + 3])? full : none,
|
||||
(second_null_map[j + 2])? full : none,
|
||||
(second_null_map[j + 1])? full : none,
|
||||
(second_null_map[j]) ? full : none)
|
||||
: zeros;
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < first.size - 3 && !has_mask; has_mask = _mm256_testc_si256(bitmask, ones), i += 4)
|
||||
{
|
||||
const __m256i first_data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(first.data + i));
|
||||
const __m256i first_nm_mask = has_first_null_map?
|
||||
_mm256_set_m128i(
|
||||
_mm_cvtepi8_epi64(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i + 2))),
|
||||
_mm_cvtepi8_epi64(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i))))
|
||||
: zeros;
|
||||
bitmask =
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
first_nm_mask,
|
||||
_mm256_cmpeq_epi64(second_data, first_data)),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(5,4,3,2,1,0,7,6)),
|
||||
_mm256_cmpeq_epi64(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(5,4,3,2,1,0,7,6))))),
|
||||
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(3,2,1,0,7,6,5,4)),
|
||||
_mm256_cmpeq_epi64(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(3,2,1,0,7,6,5,4)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(1,0,7,6,5,4,3,2)),
|
||||
_mm256_cmpeq_epi64(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(1,0,7,6,5,4,3,2)))))),
|
||||
bitmask);
|
||||
}
|
||||
|
||||
if (i < first.size)
|
||||
{
|
||||
for (; i < first.size && !has_mask; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
|
||||
__m256i v_i = _mm256_set1_epi64x(first.data[i]);
|
||||
bitmask = _mm256_or_si256(bitmask, _mm256_cmpeq_epi64(second_data, v_i));
|
||||
has_mask = _mm256_testc_si256(bitmask, ones);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_mask && second.size > 3)
|
||||
return false;
|
||||
|
||||
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
// AVX2 Int32, UInt32 specialization
|
||||
template<typename IntType>
|
||||
requires (std::is_same_v<IntType, Int32> || std::is_same_v<IntType, UInt32>)
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt32(
|
||||
const NumericArraySlice<IntType> & first,
|
||||
const NumericArraySlice<IntType> & second,
|
||||
const UInt8 * first_null_map,
|
||||
const UInt8 * second_null_map)
|
||||
{
|
||||
if (second.size == 0)
|
||||
return true;
|
||||
|
||||
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
|
||||
return false;
|
||||
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
size_t j = 0;
|
||||
int has_mask = 1;
|
||||
static constexpr int full = -1, none = 0;
|
||||
|
||||
const __m256i ones = _mm256_set1_epi32(full);
|
||||
const __m256i zeros = _mm256_setzero_si256();
|
||||
|
||||
if (second.size > 7 && first.size > 7)
|
||||
{
|
||||
for (; j < second.size - 7 && has_mask; j += 8)
|
||||
{
|
||||
has_mask = 0;
|
||||
const __m256i second_data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(second.data + j));
|
||||
// bits of the bitmask are set to one if considered as null in the corresponding null map, 0 otherwise;
|
||||
__m256i bitmask = has_second_null_map ?
|
||||
_mm256_set_epi32(
|
||||
(second_null_map[j + 7]) ? full : none,
|
||||
(second_null_map[j + 6]) ? full : none,
|
||||
(second_null_map[j + 5]) ? full : none,
|
||||
(second_null_map[j + 4]) ? full : none,
|
||||
(second_null_map[j + 3]) ? full : none,
|
||||
(second_null_map[j + 2]) ? full : none,
|
||||
(second_null_map[j + 1]) ? full : none,
|
||||
(second_null_map[j]) ? full : none)
|
||||
: zeros;
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < first.size - 7 && !has_mask; has_mask = _mm256_testc_si256(bitmask, ones), i += 8)
|
||||
{
|
||||
const __m256i first_data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(first.data + i));
|
||||
// Create a mask to avoid to compare null elements
|
||||
// set_m128i takes two arguments: (high segment, low segment) that are two __m128i convert from 8bits to 32bits to match with next operations
|
||||
const __m256i first_nm_mask = has_first_null_map?
|
||||
_mm256_set_m128i(
|
||||
_mm_cvtepi8_epi32(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i + 4))),
|
||||
_mm_cvtepi8_epi32(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i))))
|
||||
: zeros;
|
||||
bitmask =
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
first_nm_mask,
|
||||
_mm256_cmpeq_epi32(second_data, first_data)),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(6,5,4,3,2,1,0,7)),
|
||||
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(6,5,4,3,2,1,0,7))))),
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(5,4,3,2,1,0,7,6)),
|
||||
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(5,4,3,2,1,0,7,6)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(4,3,2,1,0,7,6,5)),
|
||||
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(4,3,2,1,0,7,6,5)))))
|
||||
),
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(3,2,1,0,7,6,5,4)),
|
||||
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(3,2,1,0,7,6,5,4)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(2,1,0,7,6,5,4,3)),
|
||||
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(2,1,0,7,6,5,4,3))))),
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(1,0,7,6,5,4,3,2)),
|
||||
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(1,0,7,6,5,4,3,2)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(0,7,6,5,4,3,2,1)),
|
||||
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(0,7,6,5,4,3,2,1))))))),
|
||||
bitmask);
|
||||
}
|
||||
|
||||
if (i < first.size)
|
||||
{
|
||||
for (; i < first.size && !has_mask; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
|
||||
__m256i v_i = _mm256_set1_epi32(first.data[i]);
|
||||
bitmask = _mm256_or_si256(bitmask, _mm256_cmpeq_epi32(second_data, v_i));
|
||||
has_mask = _mm256_testc_si256(bitmask, ones);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_mask && second.size > 7)
|
||||
return false;
|
||||
|
||||
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
// AVX2 Int16, UInt16 specialization
|
||||
template<typename IntType>
|
||||
requires (std::is_same_v<IntType, Int16> || std::is_same_v<IntType, UInt16>)
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt16(
|
||||
const NumericArraySlice<IntType> & first,
|
||||
const NumericArraySlice<IntType> & second,
|
||||
const UInt8 * first_null_map,
|
||||
const UInt8 * second_null_map)
|
||||
{
|
||||
if (second.size == 0)
|
||||
return true;
|
||||
|
||||
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
|
||||
return false;
|
||||
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
size_t j = 0;
|
||||
int has_mask = 1;
|
||||
static constexpr int16_t full = -1, none = 0;
|
||||
const __m256i ones = _mm256_set1_epi16(full);
|
||||
const __m256i zeros = _mm256_setzero_si256();
|
||||
if (second.size > 15 && first.size > 15)
|
||||
{
|
||||
for (; j < second.size - 15 && has_mask; j += 16)
|
||||
{
|
||||
has_mask = 0;
|
||||
const __m256i second_data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(second.data + j));
|
||||
__m256i bitmask = has_second_null_map ?
|
||||
_mm256_set_epi16(
|
||||
(second_null_map[j + 15]) ? full : none, (second_null_map[j + 14]) ? full : none,
|
||||
(second_null_map[j + 13]) ? full : none, (second_null_map[j + 12]) ? full : none,
|
||||
(second_null_map[j + 11]) ? full : none, (second_null_map[j + 10]) ? full : none,
|
||||
(second_null_map[j + 9]) ? full : none, (second_null_map[j + 8])? full : none,
|
||||
(second_null_map[j + 7]) ? full : none, (second_null_map[j + 6])? full : none,
|
||||
(second_null_map[j + 5]) ? full : none, (second_null_map[j + 4])? full : none,
|
||||
(second_null_map[j + 3]) ? full : none, (second_null_map[j + 2])? full : none,
|
||||
(second_null_map[j + 1]) ? full : none, (second_null_map[j]) ? full : none)
|
||||
: zeros;
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < first.size - 15 && !has_mask; has_mask = _mm256_testc_si256(bitmask, ones), i += 16)
|
||||
{
|
||||
const __m256i first_data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(first.data + i));
|
||||
const __m256i first_nm_mask = has_first_null_map?
|
||||
_mm256_set_m128i(
|
||||
_mm_cvtepi8_epi16(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i + 8))),
|
||||
_mm_cvtepi8_epi16(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i))))
|
||||
: zeros;
|
||||
|
||||
bitmask =
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
first_nm_mask,
|
||||
_mm256_cmpeq_epi16(second_data, first_data)),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30))))),
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26)))))
|
||||
),
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22))))),
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18))))))
|
||||
),
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_permute2x128_si256(first_nm_mask, first_nm_mask,1),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_permute2x128_si256(first_data, first_data, 1))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14))))),
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10)))))
|
||||
),
|
||||
_mm256_or_si256(
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6))))),
|
||||
_mm256_or_si256(
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data ,1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)))),
|
||||
_mm256_andnot_si256(
|
||||
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2)),
|
||||
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2))))))
|
||||
)
|
||||
),
|
||||
bitmask);
|
||||
}
|
||||
|
||||
if (i < first.size)
|
||||
{
|
||||
for (; i < first.size && !has_mask; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
|
||||
__m256i v_i = _mm256_set1_epi16(first.data[i]);
|
||||
bitmask = _mm256_or_si256(bitmask, _mm256_cmpeq_epi16(second_data, v_i));
|
||||
has_mask = _mm256_testc_si256(bitmask, ones);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_mask && second.size > 15)
|
||||
return false;
|
||||
|
||||
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
)
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
|
||||
DECLARE_SSE42_SPECIFIC_CODE (
|
||||
|
||||
// SSE4.2 Int64, UInt64 specialization
|
||||
template<typename IntType>
|
||||
requires (std::is_same_v<IntType, Int64> || std::is_same_v<IntType, UInt64>)
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt64(
|
||||
const NumericArraySlice<IntType> & first,
|
||||
const NumericArraySlice<IntType> & second,
|
||||
const UInt8 * first_null_map,
|
||||
const UInt8 * second_null_map)
|
||||
{
|
||||
if (second.size == 0)
|
||||
return true;
|
||||
|
||||
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
|
||||
return false;
|
||||
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
size_t j = 0;
|
||||
int has_mask = 1;
|
||||
static constexpr Int64 full = -1, none = 0;
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
if (second.size > 1 && first.size > 1)
|
||||
{
|
||||
for (; j < second.size - 1 && has_mask; j += 2)
|
||||
{
|
||||
has_mask = 0;
|
||||
const __m128i second_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(second.data + j));
|
||||
__m128i bitmask = has_second_null_map ?
|
||||
_mm_set_epi64x(
|
||||
(second_null_map[j + 1]) ? full : none,
|
||||
(second_null_map[j]) ? full : none)
|
||||
: zeros;
|
||||
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < first.size - 1 && !has_mask; has_mask = _mm_test_all_ones(bitmask), i += 2)
|
||||
{
|
||||
const __m128i first_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(first.data + i));
|
||||
const __m128i first_nm_mask = has_first_null_map ?
|
||||
_mm_cvtepi8_epi64(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i)))
|
||||
: zeros;
|
||||
|
||||
bitmask =
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
first_nm_mask,
|
||||
_mm_cmpeq_epi64(second_data, first_data)),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi32(first_nm_mask, _MM_SHUFFLE(1,0,3,2)),
|
||||
_mm_cmpeq_epi64(second_data, _mm_shuffle_epi32(first_data, _MM_SHUFFLE(1,0,3,2))))),
|
||||
bitmask);
|
||||
}
|
||||
|
||||
if (i < first.size)
|
||||
{
|
||||
for (; i < first.size && !has_mask; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
|
||||
__m128i v_i = _mm_set1_epi64x(first.data[i]);
|
||||
bitmask = _mm_or_si128(bitmask, _mm_cmpeq_epi64(second_data, v_i));
|
||||
has_mask = _mm_test_all_ones(bitmask);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_mask && second.size > 1)
|
||||
return false;
|
||||
|
||||
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
// SSE4.2 Int32, UInt32 specialization
|
||||
template<typename IntType>
|
||||
requires (std::is_same_v<IntType, Int32> || std::is_same_v<IntType, UInt32>)
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt32(
|
||||
const NumericArraySlice<IntType> & first,
|
||||
const NumericArraySlice<IntType> & second,
|
||||
const UInt8 * first_null_map,
|
||||
const UInt8 * second_null_map)
|
||||
{
|
||||
if (second.size == 0)
|
||||
return true;
|
||||
|
||||
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
|
||||
return false;
|
||||
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
size_t j = 0;
|
||||
int has_mask = 1;
|
||||
static constexpr int full = -1, none = 0;
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
if (second.size > 3 && first.size > 3)
|
||||
{
|
||||
for (; j < second.size - 3 && has_mask; j += 4)
|
||||
{
|
||||
has_mask = 0;
|
||||
const __m128i second_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(second.data + j));
|
||||
__m128i bitmask = has_second_null_map ?
|
||||
_mm_set_epi32(
|
||||
(second_null_map[j + 3]) ? full : none,
|
||||
(second_null_map[j + 2]) ? full : none,
|
||||
(second_null_map[j + 1]) ? full : none,
|
||||
(second_null_map[j]) ? full : none)
|
||||
: zeros;
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < first.size - 3 && !has_mask; has_mask = _mm_test_all_ones(bitmask), i += 4)
|
||||
{
|
||||
const __m128i first_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(first.data + i));
|
||||
const __m128i first_nm_mask = has_first_null_map ?
|
||||
_mm_cvtepi8_epi32(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i)))
|
||||
: zeros;
|
||||
|
||||
bitmask =
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
first_nm_mask,
|
||||
_mm_cmpeq_epi32(second_data, first_data)),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi32(first_nm_mask, _MM_SHUFFLE(2,1,0,3)),
|
||||
_mm_cmpeq_epi32(second_data, _mm_shuffle_epi32(first_data, _MM_SHUFFLE(2,1,0,3))))),
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi32(first_nm_mask, _MM_SHUFFLE(1,0,3,2)),
|
||||
_mm_cmpeq_epi32(second_data, _mm_shuffle_epi32(first_data, _MM_SHUFFLE(1,0,3,2)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi32(first_nm_mask, _MM_SHUFFLE(0,3,2,1)),
|
||||
_mm_cmpeq_epi32(second_data, _mm_shuffle_epi32(first_data, _MM_SHUFFLE(0,3,2,1)))))
|
||||
),
|
||||
bitmask);
|
||||
}
|
||||
|
||||
if (i < first.size)
|
||||
{
|
||||
for (; i < first.size && !has_mask; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
__m128i r_i = _mm_set1_epi32(first.data[i]);
|
||||
bitmask = _mm_or_si128(bitmask, _mm_cmpeq_epi32(second_data, r_i));
|
||||
has_mask = _mm_test_all_ones(bitmask);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_mask && second.size > 3)
|
||||
return false;
|
||||
|
||||
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
// SSE4.2 Int16, UInt16 specialization
|
||||
template<typename IntType>
|
||||
requires (std::is_same_v<IntType, Int16> || std::is_same_v<IntType, UInt16>)
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt16(
|
||||
const NumericArraySlice<IntType> & first,
|
||||
const NumericArraySlice<IntType> & second,
|
||||
const UInt8 * first_null_map,
|
||||
const UInt8 * second_null_map)
|
||||
{
|
||||
if (second.size == 0)
|
||||
return true;
|
||||
|
||||
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
|
||||
return false;
|
||||
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
size_t j = 0;
|
||||
int has_mask = 1;
|
||||
static constexpr int16_t full = -1, none = 0;
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
if (second.size > 6 && first.size > 6)
|
||||
{
|
||||
for (; j < second.size - 7 && has_mask; j += 8)
|
||||
{
|
||||
has_mask = 0;
|
||||
const __m128i second_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(second.data + j));
|
||||
__m128i bitmask = has_second_null_map ?
|
||||
_mm_set_epi16(
|
||||
(second_null_map[j + 7]) ? full : none, (second_null_map[j + 6]) ? full : none,
|
||||
(second_null_map[j + 5]) ? full : none, (second_null_map[j + 4]) ? full : none,
|
||||
(second_null_map[j + 3]) ? full : none, (second_null_map[j + 2]) ? full : none,
|
||||
(second_null_map[j + 1]) ? full : none, (second_null_map[j]) ? full: none)
|
||||
: zeros;
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < first.size-7 && !has_mask; has_mask = _mm_test_all_ones(bitmask), i += 8)
|
||||
{
|
||||
const __m128i first_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(first.data + i));
|
||||
const __m128i first_nm_mask = has_first_null_map ?
|
||||
_mm_cvtepi8_epi16(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i)))
|
||||
: zeros;
|
||||
bitmask =
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
first_nm_mask,
|
||||
_mm_cmpeq_epi16(second_data, first_data)),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14)),
|
||||
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14))))),
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12)),
|
||||
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10)),
|
||||
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10)))))
|
||||
),
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8)),
|
||||
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6)),
|
||||
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6))))),
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4)),
|
||||
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2)),
|
||||
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2))))))
|
||||
),
|
||||
bitmask);
|
||||
}
|
||||
|
||||
if (i < first.size)
|
||||
{
|
||||
for (; i < first.size && !has_mask; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
__m128i v_i = _mm_set1_epi16(first.data[i]);
|
||||
bitmask = _mm_or_si128(bitmask, _mm_cmpeq_epi16(second_data, v_i));
|
||||
has_mask = _mm_test_all_ones(bitmask);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_mask && second.size > 6)
|
||||
return false;
|
||||
|
||||
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
// Int8/UInt8 version is faster with SSE than with AVX2
|
||||
// SSE2 Int8, UInt8 specialization
|
||||
template<typename IntType>
|
||||
requires (std::is_same_v<IntType, Int8> || std::is_same_v<IntType, UInt8>)
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt8(
|
||||
const NumericArraySlice<IntType> & first,
|
||||
const NumericArraySlice<IntType> & second,
|
||||
const UInt8 * first_null_map,
|
||||
const UInt8 * second_null_map)
|
||||
{
|
||||
if (second.size == 0)
|
||||
return true;
|
||||
|
||||
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
|
||||
return false;
|
||||
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
size_t j = 0;
|
||||
int has_mask = 1;
|
||||
static constexpr int8_t full = -1, none = 0;
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
|
||||
if (second.size > 15 && first.size > 15)
|
||||
{
|
||||
for (; j < second.size - 15 && has_mask; j += 16)
|
||||
{
|
||||
has_mask = 0;
|
||||
const __m128i second_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(second.data + j));
|
||||
__m128i bitmask = has_second_null_map ?
|
||||
_mm_set_epi8(
|
||||
(second_null_map[j + 15]) ? full : none, (second_null_map[j + 14]) ? full : none,
|
||||
(second_null_map[j + 13]) ? full : none, (second_null_map[j + 12]) ? full : none,
|
||||
(second_null_map[j + 11]) ? full : none, (second_null_map[j + 10]) ? full : none,
|
||||
(second_null_map[j + 9]) ? full : none, (second_null_map[j + 8]) ? full : none,
|
||||
(second_null_map[j + 7]) ? full : none, (second_null_map[j + 6]) ? full : none,
|
||||
(second_null_map[j + 5]) ? full : none, (second_null_map[j + 4]) ? full : none,
|
||||
(second_null_map[j + 3]) ? full : none, (second_null_map[j + 2]) ? full : none,
|
||||
(second_null_map[j + 1]) ? full : none, (second_null_map[j]) ? full : none)
|
||||
: zeros;
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < first.size - 15 && !has_mask; has_mask = _mm_test_all_ones(bitmask), i += 16)
|
||||
{
|
||||
const __m128i first_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(first.data + i));
|
||||
const __m128i first_nm_mask = has_first_null_map ?
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i))
|
||||
: zeros;
|
||||
bitmask =
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
first_nm_mask,
|
||||
_mm_cmpeq_epi8(second_data, first_data)),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15))))),
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13)))))
|
||||
),
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11))))),
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9))))))),
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7))))),
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5)))))),
|
||||
_mm_or_si128(
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3))))),
|
||||
_mm_or_si128(
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2)))),
|
||||
_mm_andnot_si128(
|
||||
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1)),
|
||||
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1)))))))),
|
||||
bitmask);
|
||||
}
|
||||
|
||||
if (i < first.size)
|
||||
{
|
||||
for (; i < first.size && !has_mask; ++i)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[i])
|
||||
continue;
|
||||
|
||||
__m128i v_i = _mm_set1_epi8(first.data[i]);
|
||||
bitmask = _mm_or_si128(bitmask, _mm_cmpeq_epi8(second_data, v_i));
|
||||
has_mask = _mm_test_all_ones(bitmask);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_mask && second.size > 15)
|
||||
return false;
|
||||
|
||||
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
)
|
||||
|
||||
#endif
|
||||
|
||||
template <
|
||||
ArraySearchType search_type,
|
||||
typename FirstSliceType,
|
||||
typename SecondSliceType,
|
||||
bool (*isEqual)(const FirstSliceType &, const SecondSliceType &, size_t, size_t)>
|
||||
bool sliceHasImplAnyAllGenericImpl(const FirstSliceType & first, const SecondSliceType & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
|
||||
{
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
const bool has_second_null = hasNull(second_null_map, second.size);
|
||||
if (has_second_null)
|
||||
{
|
||||
const bool has_first_null = hasNull(first_null_map, first.size);
|
||||
|
||||
if (has_first_null && search_type == ArraySearchType::Any)
|
||||
return true;
|
||||
|
||||
if (!has_first_null && search_type == ArraySearchType::All)
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < second.size; ++i)
|
||||
{
|
||||
if (has_second_null_map && second_null_map[i])
|
||||
continue;
|
||||
|
||||
bool has = false;
|
||||
|
||||
for (size_t j = 0; j < first.size && !has; ++j)
|
||||
{
|
||||
if (has_first_null_map && first_null_map[j])
|
||||
continue;
|
||||
|
||||
if (isEqual(first, second, j, i))
|
||||
{
|
||||
has = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (has && search_type == ArraySearchType::Any)
|
||||
return true;
|
||||
|
||||
if (!has && search_type == ArraySearchType::All)
|
||||
return false;
|
||||
}
|
||||
|
||||
return search_type == ArraySearchType::All;
|
||||
}
|
||||
|
||||
/// Methods to check if first array has elements from second array, overloaded for various combinations of types.
|
||||
template <
|
||||
ArraySearchType search_type,
|
||||
typename FirstSliceType,
|
||||
typename SecondSliceType,
|
||||
bool (*isEqual)(const FirstSliceType &, const SecondSliceType &, size_t, size_t)>
|
||||
inline ALWAYS_INLINE bool sliceHasImplAnyAll(const FirstSliceType & first, const SecondSliceType & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
|
||||
{
|
||||
#if USE_MULTITARGET_CODE
|
||||
if constexpr (search_type == ArraySearchType::All && std::is_same_v<FirstSliceType, SecondSliceType>)
|
||||
{
|
||||
|
||||
#if defined(__AVX2__)
|
||||
if (isArchSupported(TargetArch::AVX2))
|
||||
{
|
||||
if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int16>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt16>>)
|
||||
{
|
||||
return GatherUtils::TargetSpecific::AVX2::sliceHasImplAnyAllImplInt16(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
else if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int32>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt32>>)
|
||||
{
|
||||
return GatherUtils::TargetSpecific::AVX2::sliceHasImplAnyAllImplInt32(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
else if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int64>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt64>>)
|
||||
{
|
||||
return GatherUtils::TargetSpecific::AVX2::sliceHasImplAnyAllImplInt64(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (isArchSupported(TargetArch::SSE42))
|
||||
{
|
||||
if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int8>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt8>>)
|
||||
{
|
||||
return TargetSpecific::SSE42::sliceHasImplAnyAllImplInt8(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
else if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int16>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt16>>)
|
||||
{
|
||||
return GatherUtils::TargetSpecific::SSE42::sliceHasImplAnyAllImplInt16(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
else if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int32>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt32>>)
|
||||
{
|
||||
return GatherUtils::TargetSpecific::SSE42::sliceHasImplAnyAllImplInt32(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
else if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int64>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt64>>)
|
||||
{
|
||||
return GatherUtils::TargetSpecific::SSE42::sliceHasImplAnyAllImplInt64(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return sliceHasImplAnyAllGenericImpl<search_type, FirstSliceType, SecondSliceType, isEqual>(first, second, first_null_map, second_null_map);
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -1,8 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <Functions/TargetSpecific.h>
|
||||
#include <Functions/IFunction.h>
|
||||
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
|
@ -6,8 +6,8 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/TargetSpecific.h>
|
||||
#include <Functions/PerformanceAdaptors.h>
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <base/range.h>
|
||||
#include <cmath>
|
||||
|
||||
|
139
src/Functions/tests/gtest_has_all.cpp
Normal file
139
src/Functions/tests/gtest_has_all.cpp
Normal file
@ -0,0 +1,139 @@
|
||||
#include <random>
|
||||
#include <gtest/gtest.h>
|
||||
#include <Functions/GatherUtils/Algorithms.h>
|
||||
|
||||
using namespace DB::GatherUtils;
|
||||
|
||||
|
||||
auto uni_int_dist(int min, int max)
|
||||
{
|
||||
std::random_device rd;
|
||||
std::mt19937 mt(rd());
|
||||
std::uniform_int_distribution<> dist(min, max);
|
||||
return std::make_pair(dist, mt);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void arrayInit(T* elements_to_have, size_t nb_elements_to_have, T* array_elements, size_t array_size, bool all_elements_present)
|
||||
{
|
||||
for (size_t i = 0; i < array_size; ++i)
|
||||
{
|
||||
array_elements[i] = i;
|
||||
}
|
||||
auto [dist, gen] = uni_int_dist(0, array_size - 1);
|
||||
for (size_t i = 0; i < nb_elements_to_have; ++i)
|
||||
{
|
||||
elements_to_have[i] = array_elements[dist(gen)];
|
||||
}
|
||||
if (!all_elements_present)
|
||||
{
|
||||
/// make one element to be searched for missing from the target array
|
||||
elements_to_have[nb_elements_to_have - 1] = array_size + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void nullMapInit(UInt8 * null_map, size_t null_map_size, size_t nb_null_elements)
|
||||
{
|
||||
/// -2 to keep the last element of the array non-null
|
||||
auto [dist, gen] = uni_int_dist(0, null_map_size - 2);
|
||||
for (size_t i = 0; i < null_map_size; ++i)
|
||||
{
|
||||
null_map[i] = 0;
|
||||
}
|
||||
for (size_t i = 0; i < null_map_size - 1 && i < nb_null_elements; ++i)
|
||||
{
|
||||
null_map[dist(gen)] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
bool testHasAll(size_t nb_elements_to_have, size_t array_size, bool with_null_maps, bool all_elements_present)
|
||||
{
|
||||
auto array_elements = std::make_unique<T[]>(array_size);
|
||||
auto elements_to_have = std::make_unique<T[]>(nb_elements_to_have);
|
||||
|
||||
std::unique_ptr<UInt8[]> first_nm = nullptr, second_nm = nullptr;
|
||||
if (with_null_maps)
|
||||
{
|
||||
first_nm = std::make_unique<UInt8[]>(array_size);
|
||||
second_nm = std::make_unique<UInt8[]>(nb_elements_to_have);
|
||||
/// add a null to elements to have, but not to the target array, making the answer negative
|
||||
nullMapInit(first_nm.get(), array_size, 0);
|
||||
nullMapInit(second_nm.get(), nb_elements_to_have, 1);
|
||||
}
|
||||
|
||||
arrayInit(elements_to_have.get(), nb_elements_to_have, array_elements.get(), array_size, all_elements_present);
|
||||
|
||||
NumericArraySlice<T> first = {array_elements.get(), array_size};
|
||||
NumericArraySlice<T> second = {elements_to_have.get(), nb_elements_to_have};
|
||||
|
||||
/// check whether all elements of the second array are also elements of the first array, overloaded for various combinations of types.
|
||||
return sliceHasImplAnyAll<ArraySearchType::All, NumericArraySlice<T>, NumericArraySlice<T>, sliceEqualElements<T,T> >(
|
||||
first, second, first_nm.get(), second_nm.get());
|
||||
}
|
||||
|
||||
TEST(HasAll, integer)
|
||||
{
|
||||
bool test1 = testHasAll<int>(4, 100, false, true);
|
||||
bool test2 = testHasAll<int>(4, 100, false, false);
|
||||
bool test3 = testHasAll<int>(100, 4096, false, true);
|
||||
bool test4 = testHasAll<int>(100, 4096, false, false);
|
||||
|
||||
ASSERT_EQ(test1, true);
|
||||
ASSERT_EQ(test2, false);
|
||||
ASSERT_EQ(test3, true);
|
||||
ASSERT_EQ(test4, false);
|
||||
}
|
||||
|
||||
|
||||
TEST(HasAll, int64)
|
||||
{
|
||||
bool test1 = testHasAll<int64_t>(2, 100, false, true);
|
||||
bool test2 = testHasAll<int64_t>(2, 100, false, false);
|
||||
bool test3 = testHasAll<int64_t>(100, 4096, false, true);
|
||||
bool test4 = testHasAll<int64_t>(100, 4096, false, false);
|
||||
|
||||
ASSERT_EQ(test1, true);
|
||||
ASSERT_EQ(test2, false);
|
||||
ASSERT_EQ(test3, true);
|
||||
ASSERT_EQ(test4, false);
|
||||
}
|
||||
|
||||
TEST(HasAll, int16)
|
||||
{
|
||||
bool test1 = testHasAll<int16_t>(2, 100, false, true);
|
||||
bool test2 = testHasAll<int16_t>(2, 100, false, false);
|
||||
bool test3 = testHasAll<int16_t>(100, 4096, false, true);
|
||||
bool test4 = testHasAll<int16_t>(100, 4096, false, false);
|
||||
|
||||
ASSERT_EQ(test1, true);
|
||||
ASSERT_EQ(test2, false);
|
||||
ASSERT_EQ(test3, true);
|
||||
ASSERT_EQ(test4, false);
|
||||
}
|
||||
|
||||
TEST(HasAll, int8)
|
||||
{
|
||||
bool test1 = testHasAll<int8_t>(2, 100, false, true);
|
||||
bool test2 = testHasAll<int8_t>(2, 100, false, false);
|
||||
bool test3 = testHasAll<int8_t>(50, 125, false, true);
|
||||
bool test4 = testHasAll<int8_t>(50, 125, false, false);
|
||||
|
||||
ASSERT_EQ(test1, true);
|
||||
ASSERT_EQ(test2, false);
|
||||
ASSERT_EQ(test3, true);
|
||||
ASSERT_EQ(test4, false);
|
||||
}
|
||||
|
||||
TEST(HasAllSingleNullElement, all)
|
||||
{
|
||||
bool test1 = testHasAll<int>(4, 100, true, true);
|
||||
bool test2 = testHasAll<int64_t>(4, 100, true, true);
|
||||
bool test3 = testHasAll<int16_t>(4, 100, true, true);
|
||||
bool test4 = testHasAll<int8_t>(4, 100, true, true);
|
||||
|
||||
ASSERT_EQ(test1, false);
|
||||
ASSERT_EQ(test2, false);
|
||||
ASSERT_EQ(test3, false);
|
||||
ASSERT_EQ(test4, false);
|
||||
}
|
@ -240,7 +240,7 @@ void ReadBufferFromS3::setReadUntilPosition(size_t position)
|
||||
|
||||
SeekableReadBuffer::Range ReadBufferFromS3::getRemainingReadRange() const
|
||||
{
|
||||
return Range{.left = static_cast<size_t>(offset), .right = read_until_position ? std::optional{read_until_position - 1} : std::nullopt};
|
||||
return Range{ .left = static_cast<size_t>(offset), .right = read_until_position ? std::optional{read_until_position - 1} : std::nullopt };
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
|
||||
|
@ -33,8 +33,11 @@ private:
|
||||
String key;
|
||||
UInt64 max_single_read_retries;
|
||||
|
||||
off_t offset = 0;
|
||||
off_t read_until_position = 0;
|
||||
/// These variables are atomic because they can be used for `logging only`
|
||||
/// (where it is not important to get consistent result)
|
||||
/// from separate thread other than the one which uses the buffer for s3 reading.
|
||||
std::atomic<off_t> offset = 0;
|
||||
std::atomic<off_t> read_until_position = 0;
|
||||
|
||||
Aws::S3::Model::GetObjectResult read_result;
|
||||
std::unique_ptr<ReadBuffer> impl;
|
||||
|
@ -1527,6 +1527,21 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet &
|
||||
return res;
|
||||
}
|
||||
|
||||
ActionsDAG::SplitResult ActionsDAG::splitActionsBySortingDescription(const NameSet & sort_columns) const
|
||||
{
|
||||
std::unordered_set<const Node *> split_nodes;
|
||||
for (const auto & sort_column : sort_columns)
|
||||
if (const auto * node = tryFindInIndex(sort_column))
|
||||
split_nodes.insert(node);
|
||||
else
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR, "Sorting column {} wasn't found in the ActionsDAG's index. DAG:\n{}", sort_column, dumpDAG());
|
||||
|
||||
auto res = split(split_nodes);
|
||||
res.second->project_input = project_input;
|
||||
return res;
|
||||
}
|
||||
|
||||
ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & column_name) const
|
||||
{
|
||||
const auto * node = tryFindInIndex(column_name);
|
||||
|
@ -274,6 +274,10 @@ public:
|
||||
/// Index of initial actions must contain column_name.
|
||||
SplitResult splitActionsForFilter(const std::string & column_name) const;
|
||||
|
||||
/// Splits actions into two parts. The first part contains all the calculations required to calculate sort_columns.
|
||||
/// The second contains the rest.
|
||||
SplitResult splitActionsBySortingDescription(const NameSet & sort_columns) const;
|
||||
|
||||
/// Create actions which may calculate part of filter using only available_inputs.
|
||||
/// If nothing may be calculated, returns nullptr.
|
||||
/// Otherwise, return actions which inputs are from available_inputs.
|
||||
|
@ -136,7 +136,7 @@ std::string ExternalDictionariesLoader::resolveDictionaryNameFromDatabaseCatalog
|
||||
|
||||
if (qualified_name->database.empty())
|
||||
{
|
||||
/// Ether database name is not specified and we should use current one
|
||||
/// Either database name is not specified and we should use current one
|
||||
/// or it's an XML dictionary.
|
||||
bool is_xml_dictionary = has(name);
|
||||
if (is_xml_dictionary)
|
||||
|
@ -15,6 +15,7 @@ public:
|
||||
size_t fields_count = 0;
|
||||
|
||||
String id;
|
||||
bool all = false;
|
||||
|
||||
String getID(char) const override;
|
||||
ASTPtr clone() const override;
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Parsers/ASTPartition.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
namespace DB
|
||||
@ -13,6 +14,7 @@ namespace DB
|
||||
bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserKeyword s_id("ID");
|
||||
ParserKeyword s_all("ALL");
|
||||
ParserStringLiteral parser_string_literal;
|
||||
ParserExpression parser_expr;
|
||||
|
||||
@ -28,6 +30,14 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
|
||||
partition->id = partition_id->as<ASTLiteral &>().value.get<String>();
|
||||
}
|
||||
else if (s_all.ignore(pos, expected))
|
||||
{
|
||||
ASTPtr value = makeASTFunction("tuple");
|
||||
partition->value = value;
|
||||
partition->children.push_back(value);
|
||||
partition->fields_count = 0;
|
||||
partition->all = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
ASTPtr value;
|
||||
|
@ -44,16 +44,20 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
|
||||
/// May split FilterStep and push down only part of it.
|
||||
size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
|
||||
|
||||
/// Move ExpressionStep after SortingStep if possible.
|
||||
/// May split ExpressionStep and lift up only a part of it.
|
||||
size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
|
||||
|
||||
inline const auto & getOptimizations()
|
||||
{
|
||||
static const std::array<Optimization, 5> optimizations =
|
||||
{{
|
||||
static const std::array<Optimization, 6> optimizations = {{
|
||||
{tryLiftUpArrayJoin, "liftUpArrayJoin", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
{tryPushDownLimit, "pushDownLimit", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
{trySplitFilter, "splitFilter", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
{tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
{tryPushDownFilter, "pushDownFilter", &QueryPlanOptimizationSettings::filter_push_down},
|
||||
}};
|
||||
{tryExecuteFunctionsAfterSorting, "liftUpFunctions", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
}};
|
||||
|
||||
return optimizations;
|
||||
}
|
||||
|
77
src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp
Normal file
77
src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp
Normal file
@ -0,0 +1,77 @@
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <Processors/QueryPlan/SortingStep.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
const DB::DataStream & getChildOutputStream(DB::QueryPlan::Node & node)
|
||||
{
|
||||
if (node.children.size() != 1)
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Node \"{}\" is expected to have only one child.", node.step->getName());
|
||||
return node.children.front()->step->getOutputStream();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
|
||||
size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
|
||||
{
|
||||
if (parent_node->children.size() != 1)
|
||||
return 0;
|
||||
|
||||
QueryPlan::Node * child_node = parent_node->children.front();
|
||||
|
||||
auto & parent_step = parent_node->step;
|
||||
auto & child_step = child_node->step;
|
||||
auto * sorting_step = typeid_cast<SortingStep *>(parent_step.get());
|
||||
auto * expression_step = typeid_cast<ExpressionStep *>(child_step.get());
|
||||
|
||||
if (!sorting_step || !expression_step)
|
||||
return 0;
|
||||
|
||||
NameSet sort_columns;
|
||||
for (const auto & col : sorting_step->getSortDescription())
|
||||
sort_columns.insert(col.column_name);
|
||||
auto [needed_for_sorting, unneeded_for_sorting] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns);
|
||||
|
||||
// No calculations can be postponed.
|
||||
if (unneeded_for_sorting->trivial())
|
||||
return 0;
|
||||
|
||||
// Sorting (parent_node) -> Expression (child_node)
|
||||
auto & node_with_needed = nodes.emplace_back();
|
||||
std::swap(node_with_needed.children, child_node->children);
|
||||
child_node->children = {&node_with_needed};
|
||||
|
||||
node_with_needed.step = std::make_unique<ExpressionStep>(getChildOutputStream(node_with_needed), std::move(needed_for_sorting));
|
||||
node_with_needed.step->setStepDescription(child_step->getStepDescription());
|
||||
// Sorting (parent_node) -> so far the origin Expression (child_node) -> NeededCalculations (node_with_needed)
|
||||
|
||||
std::swap(parent_step, child_step);
|
||||
// so far the origin Expression (parent_node) -> Sorting (child_node) -> NeededCalculations (node_with_needed)
|
||||
|
||||
sorting_step->updateInputStream(getChildOutputStream(*child_node));
|
||||
auto input_header = sorting_step->getInputStreams().at(0).header;
|
||||
sorting_step->updateOutputStream(std::move(input_header));
|
||||
|
||||
auto description = parent_step->getStepDescription();
|
||||
parent_step = std::make_unique<DB::ExpressionStep>(child_step->getOutputStream(), std::move(unneeded_for_sorting));
|
||||
parent_step->setStepDescription(description + " [lifted up part]");
|
||||
// UneededCalculations (parent_node) -> Sorting (child_node) -> NeededCalculations (node_with_needed)
|
||||
|
||||
return 3;
|
||||
}
|
||||
}
|
@ -1,11 +1,12 @@
|
||||
#include <stdexcept>
|
||||
#include <IO/Operators.h>
|
||||
#include <Processors/Merges/MergingSortedTransform.h>
|
||||
#include <Processors/QueryPlan/SortingStep.h>
|
||||
#include <QueryPipeline/QueryPipelineBuilder.h>
|
||||
#include <Processors/Transforms/FinishSortingTransform.h>
|
||||
#include <Processors/Transforms/LimitsCheckingTransform.h>
|
||||
#include <Processors/Transforms/MergeSortingTransform.h>
|
||||
#include <Processors/Transforms/PartialSortingTransform.h>
|
||||
#include <Processors/Transforms/FinishSortingTransform.h>
|
||||
#include <Processors/Merges/MergingSortedTransform.h>
|
||||
#include <Processors/Transforms/LimitsCheckingTransform.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <QueryPipeline/QueryPipelineBuilder.h>
|
||||
#include <Common/JSONBuilder.h>
|
||||
|
||||
namespace DB
|
||||
@ -88,6 +89,18 @@ SortingStep::SortingStep(
|
||||
output_stream->sort_mode = DataStream::SortMode::Stream;
|
||||
}
|
||||
|
||||
void SortingStep::updateInputStream(DataStream input_stream)
|
||||
{
|
||||
input_streams.clear();
|
||||
input_streams.emplace_back(std::move(input_stream));
|
||||
}
|
||||
|
||||
void SortingStep::updateOutputStream(Block result_header)
|
||||
{
|
||||
output_stream = createOutputStream(input_streams.at(0), std::move(result_header), getDataStreamTraits());
|
||||
updateDistinctColumns(output_stream->header, output_stream->distinct_columns);
|
||||
}
|
||||
|
||||
void SortingStep::updateLimit(size_t limit_)
|
||||
{
|
||||
if (limit_ && (limit == 0 || limit_ < limit))
|
||||
|
@ -49,6 +49,11 @@ public:
|
||||
/// Add limit or change it to lower value.
|
||||
void updateLimit(size_t limit_);
|
||||
|
||||
void updateInputStream(DataStream input_stream);
|
||||
void updateOutputStream(Block result_header);
|
||||
|
||||
SortDescription getSortDescription() const { return result_description; }
|
||||
|
||||
private:
|
||||
|
||||
enum class Type
|
||||
|
@ -3110,7 +3110,8 @@ void MergeTreeData::tryRemovePartImmediately(DataPartPtr && part)
|
||||
{
|
||||
auto lock = lockParts();
|
||||
|
||||
LOG_TRACE(log, "Trying to immediately remove part {}", part->getNameWithState());
|
||||
auto part_name_with_state = part->getNameWithState();
|
||||
LOG_TRACE(log, "Trying to immediately remove part {}", part_name_with_state);
|
||||
|
||||
if (part->getState() != DataPartState::Temporary)
|
||||
{
|
||||
@ -3121,7 +3122,16 @@ void MergeTreeData::tryRemovePartImmediately(DataPartPtr && part)
|
||||
part.reset();
|
||||
|
||||
if (!((*it)->getState() == DataPartState::Outdated && it->unique()))
|
||||
{
|
||||
if ((*it)->getState() != DataPartState::Outdated)
|
||||
LOG_WARNING(log, "Cannot immediately remove part {} because it's not in Outdated state "
|
||||
"usage counter {}", part_name_with_state, it->use_count());
|
||||
|
||||
if (!it->unique())
|
||||
LOG_WARNING(log, "Cannot immediately remove part {} because someone using it right now "
|
||||
"usage counter {}", part_name_with_state, it->use_count());
|
||||
return;
|
||||
}
|
||||
|
||||
modifyPartState(it, DataPartState::Deleting);
|
||||
|
||||
@ -3566,7 +3576,12 @@ void MergeTreeData::checkAlterPartitionIsPossible(
|
||||
void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition, ContextPtr local_context)
|
||||
{
|
||||
const String partition_id = getPartitionIDFromQuery(partition, local_context);
|
||||
auto parts_to_remove = getVisibleDataPartsVectorInPartition(local_context, partition_id);
|
||||
DataPartsVector parts_to_remove;
|
||||
const auto * partition_ast = partition->as<ASTPartition>();
|
||||
if (partition_ast && partition_ast->all)
|
||||
parts_to_remove = getVisibleDataPartsVector(local_context);
|
||||
else
|
||||
parts_to_remove = getVisibleDataPartsVectorInPartition(local_context, partition_id);
|
||||
|
||||
UInt64 partition_size = 0;
|
||||
|
||||
@ -4020,6 +4035,8 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc
|
||||
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
const Block & key_sample_block = metadata_snapshot->getPartitionKey().sample_block;
|
||||
if (partition_ast.all)
|
||||
return "ALL";
|
||||
size_t fields_count = key_sample_block.columns();
|
||||
if (partition_ast.fields_count != fields_count)
|
||||
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE,
|
||||
|
@ -1474,7 +1474,11 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont
|
||||
/// This protects against "revival" of data for a removed partition after completion of merge.
|
||||
auto merge_blocker = stopMergesAndWait();
|
||||
String partition_id = getPartitionIDFromQuery(partition, local_context);
|
||||
parts_to_remove = getVisibleDataPartsVectorInPartition(local_context, partition_id);
|
||||
const auto * partition_ast = partition->as<ASTPartition>();
|
||||
if (partition_ast && partition_ast->all)
|
||||
parts_to_remove = getVisibleDataPartsVector(local_context);
|
||||
else
|
||||
parts_to_remove = getVisibleDataPartsVectorInPartition(local_context, partition_id);
|
||||
|
||||
/// TODO should we throw an exception if parts_to_remove is empty?
|
||||
removePartsFromWorkingSet(local_context->getCurrentTransaction().get(), parts_to_remove, true);
|
||||
|
@ -43,6 +43,7 @@
|
||||
#include <Parsers/ASTDropQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTOptimizeQuery.h>
|
||||
#include <Parsers/ASTPartition.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Parsers/ASTCheckQuery.h>
|
||||
@ -3322,7 +3323,7 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n
|
||||
if (!broken_part_info.contains(part->info))
|
||||
continue;
|
||||
|
||||
/// Broken part itself ether already moved to detached or does not exist.
|
||||
/// Broken part itself either already moved to detached or does not exist.
|
||||
assert(broken_part_info != part->info);
|
||||
part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr());
|
||||
}
|
||||
@ -4943,15 +4944,37 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & partition, bool de
|
||||
throw Exception("DROP PARTITION cannot be done on this replica because it is not a leader", ErrorCodes::NOT_A_LEADER);
|
||||
|
||||
zkutil::ZooKeeperPtr zookeeper = getZooKeeperAndAssertNotReadonly();
|
||||
LogEntry entry;
|
||||
|
||||
String partition_id = getPartitionIDFromQuery(partition, query_context);
|
||||
bool did_drop = dropAllPartsInPartition(*zookeeper, partition_id, entry, query_context, detach);
|
||||
|
||||
if (did_drop)
|
||||
const auto * partition_ast = partition->as<ASTPartition>();
|
||||
if (partition_ast && partition_ast->all)
|
||||
{
|
||||
waitForLogEntryToBeProcessedIfNecessary(entry, query_context);
|
||||
cleanLastPartNode(partition_id);
|
||||
Strings partitions = zookeeper->getChildren(fs::path(zookeeper_path) / "block_numbers");
|
||||
|
||||
std::vector<std::pair<String, std::unique_ptr<LogEntry>>> entries_with_partitionid_to_drop;
|
||||
entries_with_partitionid_to_drop.reserve(partitions.size());
|
||||
for (String & partition_id : partitions)
|
||||
{
|
||||
auto entry = std::make_unique<LogEntry>();
|
||||
if (dropAllPartsInPartition(*zookeeper, partition_id, *entry, query_context, detach))
|
||||
entries_with_partitionid_to_drop.emplace_back(partition_id, std::move(entry));
|
||||
}
|
||||
|
||||
for (const auto & entry : entries_with_partitionid_to_drop)
|
||||
{
|
||||
waitForLogEntryToBeProcessedIfNecessary(*entry.second, query_context);
|
||||
cleanLastPartNode(entry.first);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LogEntry entry;
|
||||
String partition_id = getPartitionIDFromQuery(partition, query_context);
|
||||
bool did_drop = dropAllPartsInPartition(*zookeeper, partition_id, entry, query_context, detach);
|
||||
if (did_drop)
|
||||
{
|
||||
waitForLogEntryToBeProcessedIfNecessary(entry, query_context);
|
||||
cleanLastPartNode(partition_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -162,7 +162,7 @@ def check_pr_description(pr_info):
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
if re.match(r"(?i)^[>*_ ]*change\s*log\s*category", lines[i]):
|
||||
if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
|
||||
i += 1
|
||||
if i >= len(lines):
|
||||
break
|
||||
@ -191,7 +191,7 @@ def check_pr_description(pr_info):
|
||||
return result_status[:140], category
|
||||
|
||||
elif re.match(
|
||||
r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
|
||||
r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
|
||||
):
|
||||
i += 1
|
||||
# Can have one empty line between header and the entry itself.
|
||||
@ -262,9 +262,14 @@ if __name__ == "__main__":
|
||||
remove_labels(gh, pr_info, pr_labels_to_remove)
|
||||
|
||||
if description_report:
|
||||
print("::notice ::Cannot run, description does not match the template")
|
||||
print(
|
||||
"::error ::Cannot run, PR description does not match the template: "
|
||||
f"{description_report}"
|
||||
)
|
||||
logging.info(
|
||||
"PR body doesn't match the template: (start)\n%s\n(end)", pr_info.body
|
||||
"PR body doesn't match the template: (start)\n%s\n(end)\n" "Reason: %s",
|
||||
pr_info.body,
|
||||
description_report,
|
||||
)
|
||||
url = (
|
||||
f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/"
|
||||
|
@ -0,0 +1,4 @@
|
||||
<test>
|
||||
<query>SELECT sipHash64(number) FROM numbers(1e8) ORDER BY number LIMIT 5</query>
|
||||
<query>SELECT sipHash64(number) FROM numbers(1e8) ORDER BY number + 1 LIMIT 5</query>
|
||||
</test>
|
53
tests/performance/has_all.xml
Normal file
53
tests/performance/has_all.xml
Normal file
@ -0,0 +1,53 @@
|
||||
<test>
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>array_type</name>
|
||||
<values>
|
||||
<value>Int8</value>
|
||||
<value>Int16</value>
|
||||
<value>Int32</value>
|
||||
<value>Int64</value>
|
||||
</values>
|
||||
</substitution>
|
||||
</substitutions>
|
||||
|
||||
<create_query>
|
||||
CREATE TABLE test_table_small_{array_type}
|
||||
(
|
||||
`set` Array({array_type}),
|
||||
`subset` Array ({array_type})
|
||||
)
|
||||
ENGINE = MergeTree ORDER BY set;
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE TABLE test_table_medium_{array_type}
|
||||
(
|
||||
`set` Array({array_type}),
|
||||
`subset` Array ({array_type})
|
||||
)
|
||||
ENGINE = MergeTree ORDER BY set;
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE TABLE test_table_large_{array_type}
|
||||
(
|
||||
`set` Array({array_type}),
|
||||
`subset` Array ({array_type})
|
||||
)
|
||||
ENGINE = MergeTree ORDER BY set;
|
||||
</create_query>
|
||||
|
||||
|
||||
<fill_query>INSERT INTO test_table_small_{array_type} SELECT groupArraySample(5000)(rand64()) AS set, groupArraySample(500)(rand64()) AS subset FROM numbers(10000000) GROUP BY number % 5000;</fill_query>
|
||||
<fill_query>INSERT INTO test_table_medium_{array_type} SELECT groupArraySample(50000)(rand64()) AS set, groupArraySample(5000)(rand64()) AS subset FROM numbers(25000000) GROUP BY number % 50000;</fill_query>
|
||||
<fill_query>INSERT INTO test_table_large_{array_type} SELECT groupArraySample(500000)(rand64()) AS set, groupArraySample(500000)(rand64()) AS subset FROM numbers(50000000) GROUP BY number % 500000;</fill_query>
|
||||
|
||||
<query>SELECT hasAll(set, subset) FROM test_table_small_{array_type} FORMAT Null</query>
|
||||
<query>SELECT hasAll(set, subset) FROM test_table_medium_{array_type} FORMAT Null</query>
|
||||
<query>SELECT hasAll(set, subset) FROM test_table_large_{array_type} FORMAT Null</query>
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS test_table_small_{array_type}</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS test_table_medium_{array_type}</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS test_table_large_{array_type}</drop_query>
|
||||
</test>
|
@ -10,3 +10,15 @@
|
||||
5 2
|
||||
6 3
|
||||
7 3
|
||||
4 2
|
||||
5 2
|
||||
1 1
|
||||
2 1
|
||||
3 1
|
||||
1 1
|
||||
2 1
|
||||
3 1
|
||||
1 1
|
||||
2 2
|
||||
1 1
|
||||
1 1
|
||||
|
@ -19,4 +19,53 @@ INSERT INTO alter_attach VALUES (6, 3), (7, 3);
|
||||
ALTER TABLE alter_attach ATTACH PARTITION 2;
|
||||
SELECT * FROM alter_attach ORDER BY x;
|
||||
|
||||
ALTER TABLE alter_attach DETACH PARTITION ALL;
|
||||
SELECT * FROM alter_attach ORDER BY x;
|
||||
|
||||
ALTER TABLE alter_attach ATTACH PARTITION 2;
|
||||
SELECT * FROM alter_attach ORDER BY x;
|
||||
|
||||
DROP TABLE IF EXISTS detach_all_no_partition;
|
||||
CREATE TABLE detach_all_no_partition (x UInt64, p UInt8) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO detach_all_no_partition VALUES (1, 1), (2, 1), (3, 1);
|
||||
SELECT * FROM detach_all_no_partition ORDER BY x;
|
||||
|
||||
ALTER TABLE detach_all_no_partition DETACH PARTITION ALL;
|
||||
SELECT * FROM detach_all_no_partition ORDER BY x;
|
||||
|
||||
ALTER TABLE detach_all_no_partition ATTACH PARTITION tuple();
|
||||
SELECT * FROM detach_all_no_partition ORDER BY x;
|
||||
|
||||
DROP TABLE alter_attach;
|
||||
DROP TABLE detach_all_no_partition;
|
||||
|
||||
DROP TABLE IF EXISTS replicated_table_detach_all1;
|
||||
DROP TABLE IF EXISTS replicated_table_detach_all2;
|
||||
|
||||
CREATE TABLE replicated_table_detach_all1 (
|
||||
id UInt64,
|
||||
Data String
|
||||
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00753_{database}/replicated_table_detach_all', '1') ORDER BY id PARTITION BY id;
|
||||
|
||||
CREATE TABLE replicated_table_detach_all2 (
|
||||
id UInt64,
|
||||
Data String
|
||||
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00753_{database}/replicated_table_detach_all', '2') ORDER BY id PARTITION BY id;
|
||||
|
||||
|
||||
INSERT INTO replicated_table_detach_all1 VALUES (1, '1'), (2, '2');
|
||||
select * from replicated_table_detach_all1 order by id;
|
||||
|
||||
ALTER TABLE replicated_table_detach_all1 DETACH PARTITION ALL;
|
||||
select * from replicated_table_detach_all1 order by id;
|
||||
SYSTEM SYNC REPLICA replicated_table_detach_all2;
|
||||
select * from replicated_table_detach_all2 order by id;
|
||||
|
||||
ALTER TABLE replicated_table_detach_all1 ATTACH PARTITION tuple(1);
|
||||
select * from replicated_table_detach_all1 order by id;
|
||||
SYSTEM SYNC REPLICA replicated_table_detach_all2;
|
||||
select * from replicated_table_detach_all2 order by id;
|
||||
|
||||
DROP TABLE replicated_table_detach_all1;
|
||||
DROP TABLE replicated_table_detach_all2;
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
1000
|
||||
0
|
||||
1000
|
||||
0
|
||||
|
@ -21,4 +21,8 @@ ALTER TABLE table_01 ATTACH PART '20191001_1_1_0';
|
||||
|
||||
SELECT COUNT() FROM table_01;
|
||||
|
||||
ALTER TABLE table_01 DETACH PARTITION ALL;
|
||||
|
||||
SELECT COUNT() FROM table_01;
|
||||
|
||||
DROP TABLE IF EXISTS table_01;
|
||||
|
@ -35,10 +35,11 @@ Expression (Projection)
|
||||
ReadFromMergeTree (default.test_table)
|
||||
Expression (Projection)
|
||||
Limit (preliminary LIMIT (without OFFSET))
|
||||
Sorting
|
||||
Expression (Before ORDER BY)
|
||||
SettingQuotaAndLimits (Set limits and quota after reading from storage)
|
||||
ReadFromMergeTree (default.test_table)
|
||||
Expression (Before ORDER BY [lifted up part])
|
||||
Sorting
|
||||
Expression (Before ORDER BY)
|
||||
SettingQuotaAndLimits (Set limits and quota after reading from storage)
|
||||
ReadFromMergeTree (default.test_table)
|
||||
optimize_aggregation_in_order
|
||||
Expression ((Projection + Before ORDER BY))
|
||||
Aggregating
|
||||
|
@ -925,10 +925,11 @@ Expression ((Projection + Before ORDER BY))
|
||||
Window (Window step for window \'ORDER BY o ASC, number ASC\')
|
||||
Sorting (Sorting for window \'ORDER BY o ASC, number ASC\')
|
||||
Window (Window step for window \'ORDER BY number ASC\')
|
||||
Sorting (Sorting for window \'ORDER BY number ASC\')
|
||||
Expression ((Before window functions + (Projection + Before ORDER BY)))
|
||||
SettingQuotaAndLimits (Set limits and quota after reading from storage)
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Expression ((Before window functions + (Projection + Before ORDER BY)) [lifted up part])
|
||||
Sorting (Sorting for window \'ORDER BY number ASC\')
|
||||
Expression ((Before window functions + (Projection + Before ORDER BY)))
|
||||
SettingQuotaAndLimits (Set limits and quota after reading from storage)
|
||||
ReadFromStorage (SystemNumbers)
|
||||
-- A test case for the sort comparator found by fuzzer.
|
||||
SELECT
|
||||
max(number) OVER (ORDER BY number DESC NULLS FIRST),
|
||||
|
@ -10,8 +10,8 @@ set max_block_size=40960;
|
||||
-- MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption
|
||||
-- MergeSortingTransform: Memory usage is lowered from 186.25 MiB to 95.00 MiB
|
||||
-- MergeSortingTransform: Re-merging is not useful (memory usage was not lowered by remerge_sort_lowered_memory_bytes_ratio=2.0)
|
||||
select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by k limit 400e3 format Null; -- { serverError 241 }
|
||||
select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by k limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 }
|
||||
select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by v1, v2 limit 400e3 format Null; -- { serverError 241 }
|
||||
select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by v1, v2 limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 }
|
||||
|
||||
-- remerge_sort_lowered_memory_bytes_ratio 1.9 is good (need at least 1.91/0.98=1.94)
|
||||
-- MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption
|
||||
|
@ -142,3 +142,12 @@ Filter
|
||||
Filter
|
||||
2 3
|
||||
2 3
|
||||
> function calculation should be done after sorting and limit (if possible)
|
||||
> Expression should be divided into two subexpressions and only one of them should be moved after Sorting
|
||||
Expression (Before ORDER BY [lifted up part])
|
||||
FUNCTION sipHash64
|
||||
Sorting
|
||||
Expression (Before ORDER BY)
|
||||
FUNCTION plus
|
||||
> this query should be executed without throwing an exception
|
||||
0
|
||||
|
@ -196,3 +196,12 @@ $CLICKHOUSE_CLIENT -q "
|
||||
select a, b from (
|
||||
select number + 1 as a, number + 2 as b from numbers(2) union all select number + 1 as b, number + 2 as a from numbers(2)
|
||||
) where a != 1 settings enable_optimize_predicate_expression = 0"
|
||||
|
||||
echo "> function calculation should be done after sorting and limit (if possible)"
|
||||
echo "> Expression should be divided into two subexpressions and only one of them should be moved after Sorting"
|
||||
$CLICKHOUSE_CLIENT -q "
|
||||
explain actions = 1 select number as n, sipHash64(n) from numbers(100) order by number + 1 limit 5" |
|
||||
sed 's/^ *//g' | grep -o "^ *\(Expression (Before ORDER BY.*)\|Sorting\|FUNCTION \w\+\)"
|
||||
echo "> this query should be executed without throwing an exception"
|
||||
$CLICKHOUSE_CLIENT -q "
|
||||
select throwIf(number = 5) from (select * from numbers(10)) order by number limit 1"
|
||||
|
@ -7,13 +7,15 @@
|
||||
ExpressionTransform
|
||||
(Limit)
|
||||
Limit
|
||||
(Sorting)
|
||||
MergingSortedTransform 2 → 1
|
||||
(Expression)
|
||||
ExpressionTransform × 2
|
||||
(SettingQuotaAndLimits)
|
||||
(ReadFromMergeTree)
|
||||
MergeTreeInOrder × 2 0 → 1
|
||||
(Expression)
|
||||
ExpressionTransform
|
||||
(Sorting)
|
||||
MergingSortedTransform 2 → 1
|
||||
(Expression)
|
||||
ExpressionTransform × 2
|
||||
(SettingQuotaAndLimits)
|
||||
(ReadFromMergeTree)
|
||||
MergeTreeInOrder × 2 0 → 1
|
||||
2020-10-01 9
|
||||
2020-10-01 9
|
||||
2020-10-01 9
|
||||
@ -23,16 +25,18 @@ ExpressionTransform
|
||||
ExpressionTransform
|
||||
(Limit)
|
||||
Limit
|
||||
(Sorting)
|
||||
MergingSortedTransform 2 → 1
|
||||
(Expression)
|
||||
ExpressionTransform × 2
|
||||
(SettingQuotaAndLimits)
|
||||
(ReadFromMergeTree)
|
||||
ReverseTransform
|
||||
MergeTreeReverse 0 → 1
|
||||
ReverseTransform
|
||||
MergeTreeReverse 0 → 1
|
||||
(Expression)
|
||||
ExpressionTransform
|
||||
(Sorting)
|
||||
MergingSortedTransform 2 → 1
|
||||
(Expression)
|
||||
ExpressionTransform × 2
|
||||
(SettingQuotaAndLimits)
|
||||
(ReadFromMergeTree)
|
||||
ReverseTransform
|
||||
MergeTreeReverse 0 → 1
|
||||
ReverseTransform
|
||||
MergeTreeReverse 0 → 1
|
||||
2020-10-01 9
|
||||
2020-10-01 9
|
||||
2020-10-01 9
|
||||
@ -42,15 +46,17 @@ ExpressionTransform
|
||||
ExpressionTransform
|
||||
(Limit)
|
||||
Limit
|
||||
(Sorting)
|
||||
FinishSortingTransform
|
||||
PartialSortingTransform
|
||||
MergingSortedTransform 2 → 1
|
||||
(Expression)
|
||||
ExpressionTransform × 2
|
||||
(SettingQuotaAndLimits)
|
||||
(ReadFromMergeTree)
|
||||
MergeTreeInOrder × 2 0 → 1
|
||||
(Expression)
|
||||
ExpressionTransform
|
||||
(Sorting)
|
||||
FinishSortingTransform
|
||||
PartialSortingTransform
|
||||
MergingSortedTransform 2 → 1
|
||||
(Expression)
|
||||
ExpressionTransform × 2
|
||||
(SettingQuotaAndLimits)
|
||||
(ReadFromMergeTree)
|
||||
MergeTreeInOrder × 2 0 → 1
|
||||
2020-10-11 0
|
||||
2020-10-11 0
|
||||
2020-10-11 0
|
||||
|
Loading…
Reference in New Issue
Block a user