Merge branch 'master' into mvcc_prototype

This commit is contained in:
Alexander Tokmakov 2022-04-05 14:38:02 +02:00
commit 1fe50ad201
46 changed files with 1568 additions and 173 deletions

View File

@ -1,4 +1,4 @@
Changelog category (leave one): ### Changelog category (leave one):
- New Feature - New Feature
- Improvement - Improvement
- Bug Fix (user-visible misbehaviour in official stable or prestable release) - Bug Fix (user-visible misbehaviour in official stable or prestable release)
@ -9,7 +9,7 @@ Changelog category (leave one):
- Not for changelog (changelog entry is not required) - Not for changelog (changelog entry is not required)
Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md): ### Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md):
... ...

2
contrib/unixodbc vendored

@ -1 +1 @@
Subproject commit b0ad30f7f6289c12b76f04bfb9d466374bb32168 Subproject commit a2cd5395e8c7f7390025ec93af5bfebef3fb5fcd

View File

@ -20,6 +20,8 @@ ENV LANG=en_US.UTF-8 \
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/ COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/
COPY --from=glibc-donor /etc/nsswitch.conf /etc/ COPY --from=glibc-donor /etc/nsswitch.conf /etc/
COPY entrypoint.sh /entrypoint.sh COPY entrypoint.sh /entrypoint.sh
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \ RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \ && case $arch in \
amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \ amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \

View File

@ -810,7 +810,7 @@ void Client::addOptions(OptionsDescription & options_description)
("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server") ("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
("max_client_network_bandwidth", po::value<int>(), "the maximum speed of data exchange over the network for the client in bytes per second.") ("max_client_network_bandwidth", po::value<int>(), "the maximum speed of data exchange over the network for the client in bytes per second.")
("compression", po::value<bool>(), "enable or disable compression") ("compression", po::value<bool>(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).")
("query-fuzzer-runs", po::value<int>()->default_value(0), "After executing every SELECT query, do random mutations in it and run again specified number of times. This is used for testing to discover unexpected corner cases.") ("query-fuzzer-runs", po::value<int>()->default_value(0), "After executing every SELECT query, do random mutations in it and run again specified number of times. This is used for testing to discover unexpected corner cases.")
("interleave-queries-file", po::value<std::vector<std::string>>()->multitoken(), ("interleave-queries-file", po::value<std::vector<std::string>>()->multitoken(),

View File

@ -49,6 +49,18 @@ if (COMPILER_GCC)
add_definitions ("-fno-tree-loop-distribute-patterns") add_definitions ("-fno-tree-loop-distribute-patterns")
endif () endif ()
# ClickHouse developers may use platform-dependent code under some macro (e.g. `#ifdef ENABLE_MULTITARGET`).
# If turned ON, this option defines such macro.
# See `src/Common/TargetSpecific.h`
option(ENABLE_MULTITARGET_CODE "Enable platform-dependent code" ON)
if (ENABLE_MULTITARGET_CODE)
add_definitions(-DENABLE_MULTITARGET_CODE=1)
else()
add_definitions(-DENABLE_MULTITARGET_CODE=0)
endif()
add_subdirectory (Access) add_subdirectory (Access)
add_subdirectory (Backups) add_subdirectory (Backups)
add_subdirectory (Columns) add_subdirectory (Columns)

View File

@ -1,4 +1,4 @@
#include <Functions/TargetSpecific.h> #include <Common/TargetSpecific.h>
#include <Common/CpuId.h> #include <Common/CpuId.h>

View File

@ -334,15 +334,17 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File
read_buffer_for_file_segment->seek(file_offset_of_buffer_end, SEEK_SET); read_buffer_for_file_segment->seek(file_offset_of_buffer_end, SEEK_SET);
} }
auto impl_range = read_buffer_for_file_segment->getRemainingReadRange();
auto download_offset = file_segment->getDownloadOffset(); auto download_offset = file_segment->getDownloadOffset();
if (download_offset != static_cast<size_t>(read_buffer_for_file_segment->getPosition())) if (download_offset != static_cast<size_t>(read_buffer_for_file_segment->getPosition()))
{
auto impl_range = read_buffer_for_file_segment->getRemainingReadRange();
throw Exception( throw Exception(
ErrorCodes::LOGICAL_ERROR, ErrorCodes::LOGICAL_ERROR,
"Buffer's offsets mismatch; cached buffer offset: {}, download_offset: {}, position: {}, implementation buffer offset: {}, " "Buffer's offsets mismatch; cached buffer offset: {}, download_offset: {}, position: {}, implementation buffer offset: {}, "
"implementation buffer reading until: {}, file segment info: {}", "implementation buffer reading until: {}, file segment info: {}",
file_offset_of_buffer_end, download_offset, read_buffer_for_file_segment->getPosition(), file_offset_of_buffer_end, download_offset, read_buffer_for_file_segment->getPosition(),
impl_range.left, *impl_range.right, file_segment->getInfoForLog()); impl_range.left, *impl_range.right, file_segment->getInfoForLog());
}
break; break;
} }
@ -802,12 +804,14 @@ std::optional<size_t> CachedReadBufferFromRemoteFS::getLastNonDownloadedOffset()
String CachedReadBufferFromRemoteFS::getInfoForLog() String CachedReadBufferFromRemoteFS::getInfoForLog()
{ {
auto implementation_buffer_read_range_str = String implementation_buffer_read_range_str;
implementation_buffer ? if (implementation_buffer)
std::to_string(implementation_buffer->getRemainingReadRange().left) {
+ '-' auto read_range = implementation_buffer->getRemainingReadRange();
+ (implementation_buffer->getRemainingReadRange().right ? std::to_string(*implementation_buffer->getRemainingReadRange().right) : "None") implementation_buffer_read_range_str = std::to_string(read_range.left) + '-' + (read_range.right ? std::to_string(*read_range.right) : "None");
: "None"; }
else
implementation_buffer_read_range_str = "None";
auto current_file_segment_info = current_file_segment_it == file_segments_holder->file_segments.end() ? "None" : (*current_file_segment_it)->getInfoForLog(); auto current_file_segment_info = current_file_segment_it == file_segments_holder->file_segments.end() ? "None" : (*current_file_segment_it)->getInfoForLog();

View File

@ -96,17 +96,6 @@ if (TARGET ch_contrib::rapidjson)
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::rapidjson) target_link_libraries(clickhouse_functions PRIVATE ch_contrib::rapidjson)
endif() endif()
# ClickHouse developers may use platform-dependent code under some macro (e.g. `#ifdef ENABLE_MULTITARGET`).
# If turned ON, this option defines such macro.
# See `src/Functions/TargetSpecific.h`
option(ENABLE_MULTITARGET_CODE "Enable platform-dependent code" ON)
if (ENABLE_MULTITARGET_CODE)
add_definitions(-DENABLE_MULTITARGET_CODE=1)
else()
add_definitions(-DENABLE_MULTITARGET_CODE=0)
endif()
add_subdirectory(GatherUtils) add_subdirectory(GatherUtils)
target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_gatherutils) target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_gatherutils)

View File

@ -1,12 +1,12 @@
#pragma once #pragma once
#include <base/map.h> #include <base/map.h>
#include <Common/TargetSpecific.h>
#include <Functions/FunctionHelpers.h> #include <Functions/FunctionHelpers.h>
#include <Functions/GatherUtils/GatherUtils.h> #include <Functions/GatherUtils/GatherUtils.h>
#include <Functions/GatherUtils/Sources.h> #include <Functions/GatherUtils/Sources.h>
#include <Functions/IFunction.h> #include <Functions/IFunction.h>
#include <Functions/PerformanceAdaptors.h> #include <Functions/PerformanceAdaptors.h>
#include <Functions/TargetSpecific.h>
#include <DataTypes/DataTypeString.h> #include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <DataTypes/getLeastSupertype.h> #include <DataTypes/getLeastSupertype.h>

View File

@ -38,8 +38,8 @@
#include <Columns/ColumnTuple.h> #include <Columns/ColumnTuple.h>
#include <Functions/IFunction.h> #include <Functions/IFunction.h>
#include <Functions/FunctionHelpers.h> #include <Functions/FunctionHelpers.h>
#include <Functions/TargetSpecific.h>
#include <Functions/PerformanceAdaptors.h> #include <Functions/PerformanceAdaptors.h>
#include <Common/TargetSpecific.h>
#include <base/range.h> #include <base/range.h>
#include <base/bit_cast.h> #include <base/bit_cast.h>

View File

@ -1,9 +1,9 @@
#pragma once #pragma once
#include <Common/TargetSpecific.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnVector.h> #include <Columns/ColumnVector.h>
#include <Functions/IFunction.h> #include <Functions/IFunction.h>
#include <Functions/TargetSpecific.h>
#include <Functions/PerformanceAdaptors.h> #include <Functions/PerformanceAdaptors.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>

View File

@ -7,6 +7,8 @@
#include <Core/AccurateComparison.h> #include <Core/AccurateComparison.h>
#include <base/range.h> #include <base/range.h>
#include "GatherUtils.h" #include "GatherUtils.h"
#include "sliceEqualElements.h"
#include "sliceHasImplAnyAll.h"
namespace DB::ErrorCodes namespace DB::ErrorCodes
@ -461,39 +463,19 @@ void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, con
} }
/// Methods to check if first array has elements from second array, overloaded for various combinations of types. template <typename T>
template < bool insliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
ArraySearchType search_type, size_t first_ind [[maybe_unused]],
typename FirstSliceType, size_t second_ind [[maybe_unused]])
typename SecondSliceType,
bool (*isEqual)(const FirstSliceType &, const SecondSliceType &, size_t, size_t)>
bool sliceHasImplAnyAll(const FirstSliceType & first, const SecondSliceType & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
{ {
const bool has_first_null_map = first_null_map != nullptr; if constexpr (is_decimal<T>)
const bool has_second_null_map = second_null_map != nullptr; return accurate::equalsOp(first.data[first_ind].value, first.data[second_ind].value);
else
for (size_t i = 0; i < second.size; ++i) return accurate::equalsOp(first.data[first_ind], first.data[second_ind]);
{ }
bool has = false; inline ALWAYS_INLINE bool insliceEqualElements(const GenericArraySlice & first, size_t first_ind, size_t second_ind)
for (size_t j = 0; j < first.size && !has; ++j) {
{ return first.elements->compareAt(first_ind + first.begin, second_ind + first.begin, *first.elements, -1) == 0;
const bool is_first_null = has_first_null_map && first_null_map[j];
const bool is_second_null = has_second_null_map && second_null_map[i];
if (is_first_null && is_second_null)
has = true;
if (!is_first_null && !is_second_null && isEqual(first, second, j, i))
has = true;
}
if (has && search_type == ArraySearchType::Any)
return true;
if (!has && search_type == ArraySearchType::All)
return false;
}
return search_type == ArraySearchType::All;
} }
template < template <
@ -620,55 +602,6 @@ bool sliceHasImpl(const FirstSliceType & first, const SecondSliceType & second,
return sliceHasImplAnyAll<search_type, FirstSliceType, SecondSliceType, isEqual>(first, second, first_null_map, second_null_map); return sliceHasImplAnyAll<search_type, FirstSliceType, SecondSliceType, isEqual>(first, second, first_null_map, second_null_map);
} }
template <typename T, typename U>
bool sliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
const NumericArraySlice<U> & second [[maybe_unused]],
size_t first_ind [[maybe_unused]],
size_t second_ind [[maybe_unused]])
{
/// TODO: Decimal scale
if constexpr (is_decimal<T> && is_decimal<U>)
return accurate::equalsOp(first.data[first_ind].value, second.data[second_ind].value);
else if constexpr (is_decimal<T> || is_decimal<U>)
return false;
else
return accurate::equalsOp(first.data[first_ind], second.data[second_ind]);
}
template <typename T>
bool sliceEqualElements(const NumericArraySlice<T> &, const GenericArraySlice &, size_t, size_t)
{
return false;
}
template <typename U>
bool sliceEqualElements(const GenericArraySlice &, const NumericArraySlice<U> &, size_t, size_t)
{
return false;
}
inline ALWAYS_INLINE bool sliceEqualElements(const GenericArraySlice & first, const GenericArraySlice & second, size_t first_ind, size_t second_ind)
{
return first.elements->compareAt(first_ind + first.begin, second_ind + second.begin, *second.elements, -1) == 0;
}
template <typename T>
bool insliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
size_t first_ind [[maybe_unused]],
size_t second_ind [[maybe_unused]])
{
if constexpr (is_decimal<T>)
return accurate::equalsOp(first.data[first_ind].value, first.data[second_ind].value);
else
return accurate::equalsOp(first.data[first_ind], first.data[second_ind]);
}
inline ALWAYS_INLINE bool insliceEqualElements(const GenericArraySlice & first, size_t first_ind, size_t second_ind)
{
return first.elements->compareAt(first_ind + first.begin, second_ind + first.begin, *first.elements, -1) == 0;
}
template <ArraySearchType search_type, typename T, typename U> template <ArraySearchType search_type, typename T, typename U>
bool sliceHas(const NumericArraySlice<T> & first, const NumericArraySlice<U> & second) bool sliceHas(const NumericArraySlice<T> & first, const NumericArraySlice<U> & second)
{ {
@ -854,4 +787,3 @@ void resizeConstantSize(ArraySource && array_source, ValueSource && value_source
} }
} }

View File

@ -1,4 +1,5 @@
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
add_headers_and_sources(clickhouse_functions_gatherutils .) add_headers_and_sources(clickhouse_functions_gatherutils .)
add_library(clickhouse_functions_gatherutils ${clickhouse_functions_gatherutils_sources} ${clickhouse_functions_gatherutils_headers}) add_library(clickhouse_functions_gatherutils ${clickhouse_functions_gatherutils_sources} ${clickhouse_functions_gatherutils_headers})
target_link_libraries(clickhouse_functions_gatherutils PRIVATE dbms) target_link_libraries(clickhouse_functions_gatherutils PRIVATE dbms)
@ -14,3 +15,5 @@ endif()
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS) if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
target_compile_options(clickhouse_functions_gatherutils PRIVATE "-g0") target_compile_options(clickhouse_functions_gatherutils PRIVATE "-g0")
endif() endif()
set_target_properties(clickhouse_functions_gatherutils PROPERTIES COMPILE_FLAGS "${X86_INTRINSICS_FLAGS}")

View File

@ -0,0 +1,41 @@
#pragma once
#include <Core/AccurateComparison.h>
#include "Slices.h"
namespace DB::GatherUtils
{
template <typename T, typename U>
bool sliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
const NumericArraySlice<U> & second [[maybe_unused]],
size_t first_ind [[maybe_unused]],
size_t second_ind [[maybe_unused]])
{
/// TODO: Decimal scale
if constexpr (is_decimal<T> && is_decimal<U>)
return accurate::equalsOp(first.data[first_ind].value, second.data[second_ind].value);
else if constexpr (is_decimal<T> || is_decimal<U>)
return false;
else
return accurate::equalsOp(first.data[first_ind], second.data[second_ind]);
}
template <typename T>
bool sliceEqualElements(const NumericArraySlice<T> &, const GenericArraySlice &, size_t, size_t)
{
return false;
}
template <typename U>
bool sliceEqualElements(const GenericArraySlice &, const NumericArraySlice<U> &, size_t, size_t)
{
return false;
}
inline ALWAYS_INLINE bool sliceEqualElements(const GenericArraySlice & first, const GenericArraySlice & second, size_t first_ind, size_t second_ind)
{
return first.elements->compareAt(first_ind + first.begin, second_ind + second.begin, *second.elements, -1) == 0;
}
}

View File

@ -0,0 +1,943 @@
#pragma once
#include "GatherUtils.h"
#include "Slices.h"
#include "sliceEqualElements.h"
#if defined(__SSE4_2__)
#include <emmintrin.h>
#include <smmintrin.h>
#include <nmmintrin.h>
#endif
#if defined(__AVX2__)
#include <immintrin.h>
#endif
#include <Common/TargetSpecific.h>
namespace DB::GatherUtils
{
inline ALWAYS_INLINE bool hasNull(const UInt8 * null_map, size_t null_map_size)
{
if (null_map == nullptr)
return false;
for (size_t i = 0; i < null_map_size; ++i)
{
if (null_map[i])
return true;
}
return false;
}
template<class T>
inline ALWAYS_INLINE bool hasAllIntegralLoopRemainder(
size_t j, const NumericArraySlice<T> & first, const NumericArraySlice<T> & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
{
const bool has_first_null_map = first_null_map != nullptr;
const bool has_second_null_map = second_null_map != nullptr;
for (; j < second.size; ++j)
{
// skip null elements since both have at least one - assuming it was checked earlier that at least one element in 'first' is null
if (has_second_null_map && second_null_map[j])
continue;
bool found = false;
for (size_t i = 0; i < first.size; ++i)
{
if (has_first_null_map && first_null_map[i])
continue;
if (first.data[i] == second.data[j])
{
found = true;
break;
}
}
if (!found)
return false;
}
return true;
}
#if defined(__AVX2__)
DECLARE_AVX2_SPECIFIC_CODE (
// AVX2 Int64, UInt64 specialization
template<typename IntType>
requires (std::is_same_v<IntType, Int64> || std::is_same_v<IntType, UInt64>)
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt64(
const NumericArraySlice<IntType> & first,
const NumericArraySlice<IntType> & second,
const UInt8 * first_null_map,
const UInt8 * second_null_map)
{
if (second.size == 0)
return true;
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
return false;
const bool has_first_null_map = first_null_map != nullptr;
const bool has_second_null_map = second_null_map != nullptr;
size_t j = 0;
int has_mask = 1;
static constexpr Int64 full = -1, none = 0;
const __m256i ones = _mm256_set1_epi64x(full);
const __m256i zeros = _mm256_setzero_si256();
if (second.size > 3 && first.size > 3)
{
for (; j < second.size - 3 && has_mask; j += 4)
{
has_mask = 0;
const __m256i second_data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(second.data + j));
// bits of the bitmask are set to one if considered as null in the corresponding null map, 0 otherwise;
__m256i bitmask = has_second_null_map ?
_mm256_set_epi64x(
(second_null_map[j + 3])? full : none,
(second_null_map[j + 2])? full : none,
(second_null_map[j + 1])? full : none,
(second_null_map[j]) ? full : none)
: zeros;
size_t i = 0;
for (; i < first.size - 3 && !has_mask; has_mask = _mm256_testc_si256(bitmask, ones), i += 4)
{
const __m256i first_data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(first.data + i));
const __m256i first_nm_mask = has_first_null_map?
_mm256_set_m128i(
_mm_cvtepi8_epi64(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i + 2))),
_mm_cvtepi8_epi64(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i))))
: zeros;
bitmask =
_mm256_or_si256(
_mm256_or_si256(
_mm256_or_si256(
_mm256_andnot_si256(
first_nm_mask,
_mm256_cmpeq_epi64(second_data, first_data)),
_mm256_andnot_si256(
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(5,4,3,2,1,0,7,6)),
_mm256_cmpeq_epi64(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(5,4,3,2,1,0,7,6))))),
_mm256_or_si256(
_mm256_andnot_si256(
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(3,2,1,0,7,6,5,4)),
_mm256_cmpeq_epi64(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(3,2,1,0,7,6,5,4)))),
_mm256_andnot_si256(
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(1,0,7,6,5,4,3,2)),
_mm256_cmpeq_epi64(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(1,0,7,6,5,4,3,2)))))),
bitmask);
}
if (i < first.size)
{
for (; i < first.size && !has_mask; ++i)
{
if (has_first_null_map && first_null_map[i])
continue;
__m256i v_i = _mm256_set1_epi64x(first.data[i]);
bitmask = _mm256_or_si256(bitmask, _mm256_cmpeq_epi64(second_data, v_i));
has_mask = _mm256_testc_si256(bitmask, ones);
}
}
}
}
if (!has_mask && second.size > 3)
return false;
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
}
// AVX2 Int32, UInt32 specialization
template<typename IntType>
requires (std::is_same_v<IntType, Int32> || std::is_same_v<IntType, UInt32>)
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt32(
const NumericArraySlice<IntType> & first,
const NumericArraySlice<IntType> & second,
const UInt8 * first_null_map,
const UInt8 * second_null_map)
{
if (second.size == 0)
return true;
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
return false;
const bool has_first_null_map = first_null_map != nullptr;
const bool has_second_null_map = second_null_map != nullptr;
size_t j = 0;
int has_mask = 1;
static constexpr int full = -1, none = 0;
const __m256i ones = _mm256_set1_epi32(full);
const __m256i zeros = _mm256_setzero_si256();
if (second.size > 7 && first.size > 7)
{
for (; j < second.size - 7 && has_mask; j += 8)
{
has_mask = 0;
const __m256i second_data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(second.data + j));
// bits of the bitmask are set to one if considered as null in the corresponding null map, 0 otherwise;
__m256i bitmask = has_second_null_map ?
_mm256_set_epi32(
(second_null_map[j + 7]) ? full : none,
(second_null_map[j + 6]) ? full : none,
(second_null_map[j + 5]) ? full : none,
(second_null_map[j + 4]) ? full : none,
(second_null_map[j + 3]) ? full : none,
(second_null_map[j + 2]) ? full : none,
(second_null_map[j + 1]) ? full : none,
(second_null_map[j]) ? full : none)
: zeros;
size_t i = 0;
for (; i < first.size - 7 && !has_mask; has_mask = _mm256_testc_si256(bitmask, ones), i += 8)
{
const __m256i first_data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(first.data + i));
// Create a mask to avoid to compare null elements
// set_m128i takes two arguments: (high segment, low segment) that are two __m128i convert from 8bits to 32bits to match with next operations
const __m256i first_nm_mask = has_first_null_map?
_mm256_set_m128i(
_mm_cvtepi8_epi32(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i + 4))),
_mm_cvtepi8_epi32(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i))))
: zeros;
bitmask =
_mm256_or_si256(
_mm256_or_si256(
_mm256_or_si256(
_mm256_or_si256(
_mm256_andnot_si256(
first_nm_mask,
_mm256_cmpeq_epi32(second_data, first_data)),
_mm256_andnot_si256(
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(6,5,4,3,2,1,0,7)),
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(6,5,4,3,2,1,0,7))))),
_mm256_or_si256(
_mm256_andnot_si256(
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(5,4,3,2,1,0,7,6)),
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(5,4,3,2,1,0,7,6)))),
_mm256_andnot_si256(
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(4,3,2,1,0,7,6,5)),
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(4,3,2,1,0,7,6,5)))))
),
_mm256_or_si256(
_mm256_or_si256(
_mm256_andnot_si256(
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(3,2,1,0,7,6,5,4)),
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(3,2,1,0,7,6,5,4)))),
_mm256_andnot_si256(
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(2,1,0,7,6,5,4,3)),
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(2,1,0,7,6,5,4,3))))),
_mm256_or_si256(
_mm256_andnot_si256(
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(1,0,7,6,5,4,3,2)),
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(1,0,7,6,5,4,3,2)))),
_mm256_andnot_si256(
_mm256_permutevar8x32_epi32(first_nm_mask, _mm256_set_epi32(0,7,6,5,4,3,2,1)),
_mm256_cmpeq_epi32(second_data, _mm256_permutevar8x32_epi32(first_data, _mm256_set_epi32(0,7,6,5,4,3,2,1))))))),
bitmask);
}
if (i < first.size)
{
for (; i < first.size && !has_mask; ++i)
{
if (has_first_null_map && first_null_map[i])
continue;
__m256i v_i = _mm256_set1_epi32(first.data[i]);
bitmask = _mm256_or_si256(bitmask, _mm256_cmpeq_epi32(second_data, v_i));
has_mask = _mm256_testc_si256(bitmask, ones);
}
}
}
}
if (!has_mask && second.size > 7)
return false;
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
}
// AVX2 Int16, UInt16 specialization
template<typename IntType>
requires (std::is_same_v<IntType, Int16> || std::is_same_v<IntType, UInt16>)
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt16(
const NumericArraySlice<IntType> & first,
const NumericArraySlice<IntType> & second,
const UInt8 * first_null_map,
const UInt8 * second_null_map)
{
if (second.size == 0)
return true;
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
return false;
const bool has_first_null_map = first_null_map != nullptr;
const bool has_second_null_map = second_null_map != nullptr;
size_t j = 0;
int has_mask = 1;
static constexpr int16_t full = -1, none = 0;
const __m256i ones = _mm256_set1_epi16(full);
const __m256i zeros = _mm256_setzero_si256();
if (second.size > 15 && first.size > 15)
{
for (; j < second.size - 15 && has_mask; j += 16)
{
has_mask = 0;
const __m256i second_data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(second.data + j));
__m256i bitmask = has_second_null_map ?
_mm256_set_epi16(
(second_null_map[j + 15]) ? full : none, (second_null_map[j + 14]) ? full : none,
(second_null_map[j + 13]) ? full : none, (second_null_map[j + 12]) ? full : none,
(second_null_map[j + 11]) ? full : none, (second_null_map[j + 10]) ? full : none,
(second_null_map[j + 9]) ? full : none, (second_null_map[j + 8])? full : none,
(second_null_map[j + 7]) ? full : none, (second_null_map[j + 6])? full : none,
(second_null_map[j + 5]) ? full : none, (second_null_map[j + 4])? full : none,
(second_null_map[j + 3]) ? full : none, (second_null_map[j + 2])? full : none,
(second_null_map[j + 1]) ? full : none, (second_null_map[j]) ? full : none)
: zeros;
size_t i = 0;
for (; i < first.size - 15 && !has_mask; has_mask = _mm256_testc_si256(bitmask, ones), i += 16)
{
const __m256i first_data = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(first.data + i));
const __m256i first_nm_mask = has_first_null_map?
_mm256_set_m128i(
_mm_cvtepi8_epi16(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i + 8))),
_mm_cvtepi8_epi16(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i))))
: zeros;
bitmask =
_mm256_or_si256(
_mm256_or_si256(
_mm256_or_si256(
_mm256_or_si256(
_mm256_or_si256(
_mm256_andnot_si256(
first_nm_mask,
_mm256_cmpeq_epi16(second_data, first_data)),
_mm256_andnot_si256(
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30))))),
_mm256_or_si256(
_mm256_andnot_si256(
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28)))),
_mm256_andnot_si256(
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26)))))
),
_mm256_or_si256(
_mm256_or_si256(
_mm256_andnot_si256(
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24)))),
_mm256_andnot_si256(
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22))))),
_mm256_or_si256(
_mm256_andnot_si256(
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20)))),
_mm256_andnot_si256(
_mm256_shuffle_epi8(first_nm_mask, _mm256_set_epi8(17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(first_data, _mm256_set_epi8(17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18))))))
),
_mm256_or_si256(
_mm256_or_si256(
_mm256_or_si256(
_mm256_andnot_si256(
_mm256_permute2x128_si256(first_nm_mask, first_nm_mask,1),
_mm256_cmpeq_epi16(second_data, _mm256_permute2x128_si256(first_data, first_data, 1))),
_mm256_andnot_si256(
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14))))),
_mm256_or_si256(
_mm256_andnot_si256(
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12)))),
_mm256_andnot_si256(
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10)))))
),
_mm256_or_si256(
_mm256_or_si256(
_mm256_andnot_si256(
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)))),
_mm256_andnot_si256(
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6))))),
_mm256_or_si256(
_mm256_andnot_si256(
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data ,1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)))),
_mm256_andnot_si256(
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2))))))
)
),
bitmask);
}
if (i < first.size)
{
for (; i < first.size && !has_mask; ++i)
{
if (has_first_null_map && first_null_map[i])
continue;
__m256i v_i = _mm256_set1_epi16(first.data[i]);
bitmask = _mm256_or_si256(bitmask, _mm256_cmpeq_epi16(second_data, v_i));
has_mask = _mm256_testc_si256(bitmask, ones);
}
}
}
}
if (!has_mask && second.size > 15)
return false;
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
}
)
#endif
#if defined(__SSE4_2__)
DECLARE_SSE42_SPECIFIC_CODE (
// SSE4.2 Int64, UInt64 specialization
template<typename IntType>
requires (std::is_same_v<IntType, Int64> || std::is_same_v<IntType, UInt64>)
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt64(
const NumericArraySlice<IntType> & first,
const NumericArraySlice<IntType> & second,
const UInt8 * first_null_map,
const UInt8 * second_null_map)
{
if (second.size == 0)
return true;
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
return false;
const bool has_first_null_map = first_null_map != nullptr;
const bool has_second_null_map = second_null_map != nullptr;
size_t j = 0;
int has_mask = 1;
static constexpr Int64 full = -1, none = 0;
const __m128i zeros = _mm_setzero_si128();
if (second.size > 1 && first.size > 1)
{
for (; j < second.size - 1 && has_mask; j += 2)
{
has_mask = 0;
const __m128i second_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(second.data + j));
__m128i bitmask = has_second_null_map ?
_mm_set_epi64x(
(second_null_map[j + 1]) ? full : none,
(second_null_map[j]) ? full : none)
: zeros;
size_t i = 0;
for (; i < first.size - 1 && !has_mask; has_mask = _mm_test_all_ones(bitmask), i += 2)
{
const __m128i first_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(first.data + i));
const __m128i first_nm_mask = has_first_null_map ?
_mm_cvtepi8_epi64(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i)))
: zeros;
bitmask =
_mm_or_si128(
_mm_or_si128(
_mm_andnot_si128(
first_nm_mask,
_mm_cmpeq_epi64(second_data, first_data)),
_mm_andnot_si128(
_mm_shuffle_epi32(first_nm_mask, _MM_SHUFFLE(1,0,3,2)),
_mm_cmpeq_epi64(second_data, _mm_shuffle_epi32(first_data, _MM_SHUFFLE(1,0,3,2))))),
bitmask);
}
if (i < first.size)
{
for (; i < first.size && !has_mask; ++i)
{
if (has_first_null_map && first_null_map[i])
continue;
__m128i v_i = _mm_set1_epi64x(first.data[i]);
bitmask = _mm_or_si128(bitmask, _mm_cmpeq_epi64(second_data, v_i));
has_mask = _mm_test_all_ones(bitmask);
}
}
}
}
if (!has_mask && second.size > 1)
return false;
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
}
// SSE4.2 Int32, UInt32 specialization
template<typename IntType>
requires (std::is_same_v<IntType, Int32> || std::is_same_v<IntType, UInt32>)
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt32(
const NumericArraySlice<IntType> & first,
const NumericArraySlice<IntType> & second,
const UInt8 * first_null_map,
const UInt8 * second_null_map)
{
if (second.size == 0)
return true;
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
return false;
const bool has_first_null_map = first_null_map != nullptr;
const bool has_second_null_map = second_null_map != nullptr;
size_t j = 0;
int has_mask = 1;
static constexpr int full = -1, none = 0;
const __m128i zeros = _mm_setzero_si128();
if (second.size > 3 && first.size > 3)
{
for (; j < second.size - 3 && has_mask; j += 4)
{
has_mask = 0;
const __m128i second_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(second.data + j));
__m128i bitmask = has_second_null_map ?
_mm_set_epi32(
(second_null_map[j + 3]) ? full : none,
(second_null_map[j + 2]) ? full : none,
(second_null_map[j + 1]) ? full : none,
(second_null_map[j]) ? full : none)
: zeros;
size_t i = 0;
for (; i < first.size - 3 && !has_mask; has_mask = _mm_test_all_ones(bitmask), i += 4)
{
const __m128i first_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(first.data + i));
const __m128i first_nm_mask = has_first_null_map ?
_mm_cvtepi8_epi32(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i)))
: zeros;
bitmask =
_mm_or_si128(
_mm_or_si128(
_mm_or_si128(
_mm_andnot_si128(
first_nm_mask,
_mm_cmpeq_epi32(second_data, first_data)),
_mm_andnot_si128(
_mm_shuffle_epi32(first_nm_mask, _MM_SHUFFLE(2,1,0,3)),
_mm_cmpeq_epi32(second_data, _mm_shuffle_epi32(first_data, _MM_SHUFFLE(2,1,0,3))))),
_mm_or_si128(
_mm_andnot_si128(
_mm_shuffle_epi32(first_nm_mask, _MM_SHUFFLE(1,0,3,2)),
_mm_cmpeq_epi32(second_data, _mm_shuffle_epi32(first_data, _MM_SHUFFLE(1,0,3,2)))),
_mm_andnot_si128(
_mm_shuffle_epi32(first_nm_mask, _MM_SHUFFLE(0,3,2,1)),
_mm_cmpeq_epi32(second_data, _mm_shuffle_epi32(first_data, _MM_SHUFFLE(0,3,2,1)))))
),
bitmask);
}
if (i < first.size)
{
for (; i < first.size && !has_mask; ++i)
{
if (has_first_null_map && first_null_map[i])
continue;
__m128i r_i = _mm_set1_epi32(first.data[i]);
bitmask = _mm_or_si128(bitmask, _mm_cmpeq_epi32(second_data, r_i));
has_mask = _mm_test_all_ones(bitmask);
}
}
}
}
if (!has_mask && second.size > 3)
return false;
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
}
// SSE4.2 Int16, UInt16 specialization
template<typename IntType>
requires (std::is_same_v<IntType, Int16> || std::is_same_v<IntType, UInt16>)
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt16(
const NumericArraySlice<IntType> & first,
const NumericArraySlice<IntType> & second,
const UInt8 * first_null_map,
const UInt8 * second_null_map)
{
if (second.size == 0)
return true;
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
return false;
const bool has_first_null_map = first_null_map != nullptr;
const bool has_second_null_map = second_null_map != nullptr;
size_t j = 0;
int has_mask = 1;
static constexpr int16_t full = -1, none = 0;
const __m128i zeros = _mm_setzero_si128();
if (second.size > 6 && first.size > 6)
{
for (; j < second.size - 7 && has_mask; j += 8)
{
has_mask = 0;
const __m128i second_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(second.data + j));
__m128i bitmask = has_second_null_map ?
_mm_set_epi16(
(second_null_map[j + 7]) ? full : none, (second_null_map[j + 6]) ? full : none,
(second_null_map[j + 5]) ? full : none, (second_null_map[j + 4]) ? full : none,
(second_null_map[j + 3]) ? full : none, (second_null_map[j + 2]) ? full : none,
(second_null_map[j + 1]) ? full : none, (second_null_map[j]) ? full: none)
: zeros;
size_t i = 0;
for (; i < first.size-7 && !has_mask; has_mask = _mm_test_all_ones(bitmask), i += 8)
{
const __m128i first_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(first.data + i));
const __m128i first_nm_mask = has_first_null_map ?
_mm_cvtepi8_epi16(_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i)))
: zeros;
bitmask =
_mm_or_si128(
_mm_or_si128(
_mm_or_si128(
_mm_or_si128(
_mm_andnot_si128(
first_nm_mask,
_mm_cmpeq_epi16(second_data, first_data)),
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14)),
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14))))),
_mm_or_si128(
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12)),
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12)))),
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10)),
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10)))))
),
_mm_or_si128(
_mm_or_si128(
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8)),
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8)))),
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6)),
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6))))),
_mm_or_si128(
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4)),
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4)))),
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2)),
_mm_cmpeq_epi16(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2))))))
),
bitmask);
}
if (i < first.size)
{
for (; i < first.size && !has_mask; ++i)
{
if (has_first_null_map && first_null_map[i])
continue;
__m128i v_i = _mm_set1_epi16(first.data[i]);
bitmask = _mm_or_si128(bitmask, _mm_cmpeq_epi16(second_data, v_i));
has_mask = _mm_test_all_ones(bitmask);
}
}
}
}
if (!has_mask && second.size > 6)
return false;
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
}
// Int8/UInt8 version is faster with SSE than with AVX2
// SSE2 Int8, UInt8 specialization
template<typename IntType>
requires (std::is_same_v<IntType, Int8> || std::is_same_v<IntType, UInt8>)
inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt8(
const NumericArraySlice<IntType> & first,
const NumericArraySlice<IntType> & second,
const UInt8 * first_null_map,
const UInt8 * second_null_map)
{
if (second.size == 0)
return true;
if (!hasNull(first_null_map, first.size) && hasNull(second_null_map, second.size))
return false;
const bool has_first_null_map = first_null_map != nullptr;
const bool has_second_null_map = second_null_map != nullptr;
size_t j = 0;
int has_mask = 1;
static constexpr int8_t full = -1, none = 0;
const __m128i zeros = _mm_setzero_si128();
if (second.size > 15 && first.size > 15)
{
for (; j < second.size - 15 && has_mask; j += 16)
{
has_mask = 0;
const __m128i second_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(second.data + j));
__m128i bitmask = has_second_null_map ?
_mm_set_epi8(
(second_null_map[j + 15]) ? full : none, (second_null_map[j + 14]) ? full : none,
(second_null_map[j + 13]) ? full : none, (second_null_map[j + 12]) ? full : none,
(second_null_map[j + 11]) ? full : none, (second_null_map[j + 10]) ? full : none,
(second_null_map[j + 9]) ? full : none, (second_null_map[j + 8]) ? full : none,
(second_null_map[j + 7]) ? full : none, (second_null_map[j + 6]) ? full : none,
(second_null_map[j + 5]) ? full : none, (second_null_map[j + 4]) ? full : none,
(second_null_map[j + 3]) ? full : none, (second_null_map[j + 2]) ? full : none,
(second_null_map[j + 1]) ? full : none, (second_null_map[j]) ? full : none)
: zeros;
size_t i = 0;
for (; i < first.size - 15 && !has_mask; has_mask = _mm_test_all_ones(bitmask), i += 16)
{
const __m128i first_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(first.data + i));
const __m128i first_nm_mask = has_first_null_map ?
_mm_loadu_si128(reinterpret_cast<const __m128i *>(first_null_map + i))
: zeros;
bitmask =
_mm_or_si128(
_mm_or_si128(
_mm_or_si128(
_mm_or_si128(
_mm_or_si128(
_mm_andnot_si128(
first_nm_mask,
_mm_cmpeq_epi8(second_data, first_data)),
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15)),
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15))))),
_mm_or_si128(
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14)),
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14)))),
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13)),
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13)))))
),
_mm_or_si128(
_mm_or_si128(
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12)),
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12)))),
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11)),
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11))))),
_mm_or_si128(
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10)),
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10)))),
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9)),
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9))))))),
_mm_or_si128(
_mm_or_si128(
_mm_or_si128(
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8)),
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8)))),
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7)),
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7))))),
_mm_or_si128(
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6)),
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6)))),
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5)),
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5)))))),
_mm_or_si128(
_mm_or_si128(
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4)),
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4)))),
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3)),
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3))))),
_mm_or_si128(
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2)),
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2)))),
_mm_andnot_si128(
_mm_shuffle_epi8(first_nm_mask, _mm_set_epi8(0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1)),
_mm_cmpeq_epi8(second_data, _mm_shuffle_epi8(first_data, _mm_set_epi8(0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1)))))))),
bitmask);
}
if (i < first.size)
{
for (; i < first.size && !has_mask; ++i)
{
if (has_first_null_map && first_null_map[i])
continue;
__m128i v_i = _mm_set1_epi8(first.data[i]);
bitmask = _mm_or_si128(bitmask, _mm_cmpeq_epi8(second_data, v_i));
has_mask = _mm_test_all_ones(bitmask);
}
}
}
}
if (!has_mask && second.size > 15)
return false;
return hasAllIntegralLoopRemainder(j, first, second, first_null_map, second_null_map);
}
)
#endif
template <
ArraySearchType search_type,
typename FirstSliceType,
typename SecondSliceType,
bool (*isEqual)(const FirstSliceType &, const SecondSliceType &, size_t, size_t)>
bool sliceHasImplAnyAllGenericImpl(const FirstSliceType & first, const SecondSliceType & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
{
const bool has_first_null_map = first_null_map != nullptr;
const bool has_second_null_map = second_null_map != nullptr;
const bool has_second_null = hasNull(second_null_map, second.size);
if (has_second_null)
{
const bool has_first_null = hasNull(first_null_map, first.size);
if (has_first_null && search_type == ArraySearchType::Any)
return true;
if (!has_first_null && search_type == ArraySearchType::All)
return false;
}
for (size_t i = 0; i < second.size; ++i)
{
if (has_second_null_map && second_null_map[i])
continue;
bool has = false;
for (size_t j = 0; j < first.size && !has; ++j)
{
if (has_first_null_map && first_null_map[j])
continue;
if (isEqual(first, second, j, i))
{
has = true;
break;
}
}
if (has && search_type == ArraySearchType::Any)
return true;
if (!has && search_type == ArraySearchType::All)
return false;
}
return search_type == ArraySearchType::All;
}
/// Methods to check if first array has elements from second array, overloaded for various combinations of types.
template <
ArraySearchType search_type,
typename FirstSliceType,
typename SecondSliceType,
bool (*isEqual)(const FirstSliceType &, const SecondSliceType &, size_t, size_t)>
inline ALWAYS_INLINE bool sliceHasImplAnyAll(const FirstSliceType & first, const SecondSliceType & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
{
#if USE_MULTITARGET_CODE
if constexpr (search_type == ArraySearchType::All && std::is_same_v<FirstSliceType, SecondSliceType>)
{
#if defined(__AVX2__)
if (isArchSupported(TargetArch::AVX2))
{
if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int16>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt16>>)
{
return GatherUtils::TargetSpecific::AVX2::sliceHasImplAnyAllImplInt16(first, second, first_null_map, second_null_map);
}
else if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int32>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt32>>)
{
return GatherUtils::TargetSpecific::AVX2::sliceHasImplAnyAllImplInt32(first, second, first_null_map, second_null_map);
}
else if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int64>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt64>>)
{
return GatherUtils::TargetSpecific::AVX2::sliceHasImplAnyAllImplInt64(first, second, first_null_map, second_null_map);
}
}
#endif
if (isArchSupported(TargetArch::SSE42))
{
if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int8>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt8>>)
{
return TargetSpecific::SSE42::sliceHasImplAnyAllImplInt8(first, second, first_null_map, second_null_map);
}
else if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int16>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt16>>)
{
return GatherUtils::TargetSpecific::SSE42::sliceHasImplAnyAllImplInt16(first, second, first_null_map, second_null_map);
}
else if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int32>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt32>>)
{
return GatherUtils::TargetSpecific::SSE42::sliceHasImplAnyAllImplInt32(first, second, first_null_map, second_null_map);
}
else if constexpr (std::is_same_v<FirstSliceType, NumericArraySlice<Int64>> || std::is_same_v<FirstSliceType, NumericArraySlice<UInt64>>)
{
return GatherUtils::TargetSpecific::SSE42::sliceHasImplAnyAllImplInt64(first, second, first_null_map, second_null_map);
}
}
}
#endif
return sliceHasImplAnyAllGenericImpl<search_type, FirstSliceType, SecondSliceType, isEqual>(first, second, first_null_map, second_null_map);
}
}

View File

@ -1,8 +1,8 @@
#pragma once #pragma once
#include <Functions/TargetSpecific.h>
#include <Functions/IFunction.h> #include <Functions/IFunction.h>
#include <Common/TargetSpecific.h>
#include <Common/Stopwatch.h> #include <Common/Stopwatch.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>

View File

@ -6,8 +6,8 @@
#include <Functions/IFunction.h> #include <Functions/IFunction.h>
#include <Functions/FunctionHelpers.h> #include <Functions/FunctionHelpers.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/TargetSpecific.h>
#include <Functions/PerformanceAdaptors.h> #include <Functions/PerformanceAdaptors.h>
#include <Common/TargetSpecific.h>
#include <base/range.h> #include <base/range.h>
#include <cmath> #include <cmath>

View File

@ -0,0 +1,139 @@
#include <random>
#include <gtest/gtest.h>
#include <Functions/GatherUtils/Algorithms.h>
using namespace DB::GatherUtils;
auto uni_int_dist(int min, int max)
{
std::random_device rd;
std::mt19937 mt(rd());
std::uniform_int_distribution<> dist(min, max);
return std::make_pair(dist, mt);
}
template<class T>
void arrayInit(T* elements_to_have, size_t nb_elements_to_have, T* array_elements, size_t array_size, bool all_elements_present)
{
for (size_t i = 0; i < array_size; ++i)
{
array_elements[i] = i;
}
auto [dist, gen] = uni_int_dist(0, array_size - 1);
for (size_t i = 0; i < nb_elements_to_have; ++i)
{
elements_to_have[i] = array_elements[dist(gen)];
}
if (!all_elements_present)
{
/// make one element to be searched for missing from the target array
elements_to_have[nb_elements_to_have - 1] = array_size + 1;
}
}
void nullMapInit(UInt8 * null_map, size_t null_map_size, size_t nb_null_elements)
{
/// -2 to keep the last element of the array non-null
auto [dist, gen] = uni_int_dist(0, null_map_size - 2);
for (size_t i = 0; i < null_map_size; ++i)
{
null_map[i] = 0;
}
for (size_t i = 0; i < null_map_size - 1 && i < nb_null_elements; ++i)
{
null_map[dist(gen)] = 1;
}
}
template<class T>
bool testHasAll(size_t nb_elements_to_have, size_t array_size, bool with_null_maps, bool all_elements_present)
{
auto array_elements = std::make_unique<T[]>(array_size);
auto elements_to_have = std::make_unique<T[]>(nb_elements_to_have);
std::unique_ptr<UInt8[]> first_nm = nullptr, second_nm = nullptr;
if (with_null_maps)
{
first_nm = std::make_unique<UInt8[]>(array_size);
second_nm = std::make_unique<UInt8[]>(nb_elements_to_have);
/// add a null to elements to have, but not to the target array, making the answer negative
nullMapInit(first_nm.get(), array_size, 0);
nullMapInit(second_nm.get(), nb_elements_to_have, 1);
}
arrayInit(elements_to_have.get(), nb_elements_to_have, array_elements.get(), array_size, all_elements_present);
NumericArraySlice<T> first = {array_elements.get(), array_size};
NumericArraySlice<T> second = {elements_to_have.get(), nb_elements_to_have};
/// check whether all elements of the second array are also elements of the first array, overloaded for various combinations of types.
return sliceHasImplAnyAll<ArraySearchType::All, NumericArraySlice<T>, NumericArraySlice<T>, sliceEqualElements<T,T> >(
first, second, first_nm.get(), second_nm.get());
}
TEST(HasAll, integer)
{
bool test1 = testHasAll<int>(4, 100, false, true);
bool test2 = testHasAll<int>(4, 100, false, false);
bool test3 = testHasAll<int>(100, 4096, false, true);
bool test4 = testHasAll<int>(100, 4096, false, false);
ASSERT_EQ(test1, true);
ASSERT_EQ(test2, false);
ASSERT_EQ(test3, true);
ASSERT_EQ(test4, false);
}
TEST(HasAll, int64)
{
bool test1 = testHasAll<int64_t>(2, 100, false, true);
bool test2 = testHasAll<int64_t>(2, 100, false, false);
bool test3 = testHasAll<int64_t>(100, 4096, false, true);
bool test4 = testHasAll<int64_t>(100, 4096, false, false);
ASSERT_EQ(test1, true);
ASSERT_EQ(test2, false);
ASSERT_EQ(test3, true);
ASSERT_EQ(test4, false);
}
TEST(HasAll, int16)
{
bool test1 = testHasAll<int16_t>(2, 100, false, true);
bool test2 = testHasAll<int16_t>(2, 100, false, false);
bool test3 = testHasAll<int16_t>(100, 4096, false, true);
bool test4 = testHasAll<int16_t>(100, 4096, false, false);
ASSERT_EQ(test1, true);
ASSERT_EQ(test2, false);
ASSERT_EQ(test3, true);
ASSERT_EQ(test4, false);
}
TEST(HasAll, int8)
{
bool test1 = testHasAll<int8_t>(2, 100, false, true);
bool test2 = testHasAll<int8_t>(2, 100, false, false);
bool test3 = testHasAll<int8_t>(50, 125, false, true);
bool test4 = testHasAll<int8_t>(50, 125, false, false);
ASSERT_EQ(test1, true);
ASSERT_EQ(test2, false);
ASSERT_EQ(test3, true);
ASSERT_EQ(test4, false);
}
TEST(HasAllSingleNullElement, all)
{
bool test1 = testHasAll<int>(4, 100, true, true);
bool test2 = testHasAll<int64_t>(4, 100, true, true);
bool test3 = testHasAll<int16_t>(4, 100, true, true);
bool test4 = testHasAll<int8_t>(4, 100, true, true);
ASSERT_EQ(test1, false);
ASSERT_EQ(test2, false);
ASSERT_EQ(test3, false);
ASSERT_EQ(test4, false);
}

View File

@ -240,7 +240,7 @@ void ReadBufferFromS3::setReadUntilPosition(size_t position)
SeekableReadBuffer::Range ReadBufferFromS3::getRemainingReadRange() const SeekableReadBuffer::Range ReadBufferFromS3::getRemainingReadRange() const
{ {
return Range{.left = static_cast<size_t>(offset), .right = read_until_position ? std::optional{read_until_position - 1} : std::nullopt}; return Range{ .left = static_cast<size_t>(offset), .right = read_until_position ? std::optional{read_until_position - 1} : std::nullopt };
} }
std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize() std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()

View File

@ -33,8 +33,11 @@ private:
String key; String key;
UInt64 max_single_read_retries; UInt64 max_single_read_retries;
off_t offset = 0; /// These variables are atomic because they can be used for `logging only`
off_t read_until_position = 0; /// (where it is not important to get consistent result)
/// from separate thread other than the one which uses the buffer for s3 reading.
std::atomic<off_t> offset = 0;
std::atomic<off_t> read_until_position = 0;
Aws::S3::Model::GetObjectResult read_result; Aws::S3::Model::GetObjectResult read_result;
std::unique_ptr<ReadBuffer> impl; std::unique_ptr<ReadBuffer> impl;

View File

@ -1527,6 +1527,21 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet &
return res; return res;
} }
ActionsDAG::SplitResult ActionsDAG::splitActionsBySortingDescription(const NameSet & sort_columns) const
{
std::unordered_set<const Node *> split_nodes;
for (const auto & sort_column : sort_columns)
if (const auto * node = tryFindInIndex(sort_column))
split_nodes.insert(node);
else
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Sorting column {} wasn't found in the ActionsDAG's index. DAG:\n{}", sort_column, dumpDAG());
auto res = split(split_nodes);
res.second->project_input = project_input;
return res;
}
ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & column_name) const ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & column_name) const
{ {
const auto * node = tryFindInIndex(column_name); const auto * node = tryFindInIndex(column_name);

View File

@ -274,6 +274,10 @@ public:
/// Index of initial actions must contain column_name. /// Index of initial actions must contain column_name.
SplitResult splitActionsForFilter(const std::string & column_name) const; SplitResult splitActionsForFilter(const std::string & column_name) const;
/// Splits actions into two parts. The first part contains all the calculations required to calculate sort_columns.
/// The second contains the rest.
SplitResult splitActionsBySortingDescription(const NameSet & sort_columns) const;
/// Create actions which may calculate part of filter using only available_inputs. /// Create actions which may calculate part of filter using only available_inputs.
/// If nothing may be calculated, returns nullptr. /// If nothing may be calculated, returns nullptr.
/// Otherwise, return actions which inputs are from available_inputs. /// Otherwise, return actions which inputs are from available_inputs.

View File

@ -136,7 +136,7 @@ std::string ExternalDictionariesLoader::resolveDictionaryNameFromDatabaseCatalog
if (qualified_name->database.empty()) if (qualified_name->database.empty())
{ {
/// Ether database name is not specified and we should use current one /// Either database name is not specified and we should use current one
/// or it's an XML dictionary. /// or it's an XML dictionary.
bool is_xml_dictionary = has(name); bool is_xml_dictionary = has(name);
if (is_xml_dictionary) if (is_xml_dictionary)

View File

@ -15,6 +15,7 @@ public:
size_t fields_count = 0; size_t fields_count = 0;
String id; String id;
bool all = false;
String getID(char) const override; String getID(char) const override;
ASTPtr clone() const override; ASTPtr clone() const override;

View File

@ -5,6 +5,7 @@
#include <Parsers/ASTPartition.h> #include <Parsers/ASTPartition.h>
#include <Parsers/ASTLiteral.h> #include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
namespace DB namespace DB
@ -13,6 +14,7 @@ namespace DB
bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{ {
ParserKeyword s_id("ID"); ParserKeyword s_id("ID");
ParserKeyword s_all("ALL");
ParserStringLiteral parser_string_literal; ParserStringLiteral parser_string_literal;
ParserExpression parser_expr; ParserExpression parser_expr;
@ -28,6 +30,14 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
partition->id = partition_id->as<ASTLiteral &>().value.get<String>(); partition->id = partition_id->as<ASTLiteral &>().value.get<String>();
} }
else if (s_all.ignore(pos, expected))
{
ASTPtr value = makeASTFunction("tuple");
partition->value = value;
partition->children.push_back(value);
partition->fields_count = 0;
partition->all = true;
}
else else
{ {
ASTPtr value; ASTPtr value;

View File

@ -44,16 +44,20 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &);
/// May split FilterStep and push down only part of it. /// May split FilterStep and push down only part of it.
size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes); size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
/// Move ExpressionStep after SortingStep if possible.
/// May split ExpressionStep and lift up only a part of it.
size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
inline const auto & getOptimizations() inline const auto & getOptimizations()
{ {
static const std::array<Optimization, 5> optimizations = static const std::array<Optimization, 6> optimizations = {{
{{
{tryLiftUpArrayJoin, "liftUpArrayJoin", &QueryPlanOptimizationSettings::optimize_plan}, {tryLiftUpArrayJoin, "liftUpArrayJoin", &QueryPlanOptimizationSettings::optimize_plan},
{tryPushDownLimit, "pushDownLimit", &QueryPlanOptimizationSettings::optimize_plan}, {tryPushDownLimit, "pushDownLimit", &QueryPlanOptimizationSettings::optimize_plan},
{trySplitFilter, "splitFilter", &QueryPlanOptimizationSettings::optimize_plan}, {trySplitFilter, "splitFilter", &QueryPlanOptimizationSettings::optimize_plan},
{tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::optimize_plan}, {tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::optimize_plan},
{tryPushDownFilter, "pushDownFilter", &QueryPlanOptimizationSettings::filter_push_down}, {tryPushDownFilter, "pushDownFilter", &QueryPlanOptimizationSettings::filter_push_down},
}}; {tryExecuteFunctionsAfterSorting, "liftUpFunctions", &QueryPlanOptimizationSettings::optimize_plan},
}};
return optimizations; return optimizations;
} }

View File

@ -0,0 +1,77 @@
#include <Interpreters/ActionsDAG.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/SortingStep.h>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
}
namespace
{
const DB::DataStream & getChildOutputStream(DB::QueryPlan::Node & node)
{
if (node.children.size() != 1)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Node \"{}\" is expected to have only one child.", node.step->getName());
return node.children.front()->step->getOutputStream();
}
}
namespace DB::QueryPlanOptimizations
{
size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
{
if (parent_node->children.size() != 1)
return 0;
QueryPlan::Node * child_node = parent_node->children.front();
auto & parent_step = parent_node->step;
auto & child_step = child_node->step;
auto * sorting_step = typeid_cast<SortingStep *>(parent_step.get());
auto * expression_step = typeid_cast<ExpressionStep *>(child_step.get());
if (!sorting_step || !expression_step)
return 0;
NameSet sort_columns;
for (const auto & col : sorting_step->getSortDescription())
sort_columns.insert(col.column_name);
auto [needed_for_sorting, unneeded_for_sorting] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns);
// No calculations can be postponed.
if (unneeded_for_sorting->trivial())
return 0;
// Sorting (parent_node) -> Expression (child_node)
auto & node_with_needed = nodes.emplace_back();
std::swap(node_with_needed.children, child_node->children);
child_node->children = {&node_with_needed};
node_with_needed.step = std::make_unique<ExpressionStep>(getChildOutputStream(node_with_needed), std::move(needed_for_sorting));
node_with_needed.step->setStepDescription(child_step->getStepDescription());
// Sorting (parent_node) -> so far the origin Expression (child_node) -> NeededCalculations (node_with_needed)
std::swap(parent_step, child_step);
// so far the origin Expression (parent_node) -> Sorting (child_node) -> NeededCalculations (node_with_needed)
sorting_step->updateInputStream(getChildOutputStream(*child_node));
auto input_header = sorting_step->getInputStreams().at(0).header;
sorting_step->updateOutputStream(std::move(input_header));
auto description = parent_step->getStepDescription();
parent_step = std::make_unique<DB::ExpressionStep>(child_step->getOutputStream(), std::move(unneeded_for_sorting));
parent_step->setStepDescription(description + " [lifted up part]");
// UneededCalculations (parent_node) -> Sorting (child_node) -> NeededCalculations (node_with_needed)
return 3;
}
}

View File

@ -1,11 +1,12 @@
#include <stdexcept>
#include <IO/Operators.h>
#include <Processors/Merges/MergingSortedTransform.h>
#include <Processors/QueryPlan/SortingStep.h> #include <Processors/QueryPlan/SortingStep.h>
#include <QueryPipeline/QueryPipelineBuilder.h> #include <Processors/Transforms/FinishSortingTransform.h>
#include <Processors/Transforms/LimitsCheckingTransform.h>
#include <Processors/Transforms/MergeSortingTransform.h> #include <Processors/Transforms/MergeSortingTransform.h>
#include <Processors/Transforms/PartialSortingTransform.h> #include <Processors/Transforms/PartialSortingTransform.h>
#include <Processors/Transforms/FinishSortingTransform.h> #include <QueryPipeline/QueryPipelineBuilder.h>
#include <Processors/Merges/MergingSortedTransform.h>
#include <Processors/Transforms/LimitsCheckingTransform.h>
#include <IO/Operators.h>
#include <Common/JSONBuilder.h> #include <Common/JSONBuilder.h>
namespace DB namespace DB
@ -88,6 +89,18 @@ SortingStep::SortingStep(
output_stream->sort_mode = DataStream::SortMode::Stream; output_stream->sort_mode = DataStream::SortMode::Stream;
} }
void SortingStep::updateInputStream(DataStream input_stream)
{
input_streams.clear();
input_streams.emplace_back(std::move(input_stream));
}
void SortingStep::updateOutputStream(Block result_header)
{
output_stream = createOutputStream(input_streams.at(0), std::move(result_header), getDataStreamTraits());
updateDistinctColumns(output_stream->header, output_stream->distinct_columns);
}
void SortingStep::updateLimit(size_t limit_) void SortingStep::updateLimit(size_t limit_)
{ {
if (limit_ && (limit == 0 || limit_ < limit)) if (limit_ && (limit == 0 || limit_ < limit))

View File

@ -49,6 +49,11 @@ public:
/// Add limit or change it to lower value. /// Add limit or change it to lower value.
void updateLimit(size_t limit_); void updateLimit(size_t limit_);
void updateInputStream(DataStream input_stream);
void updateOutputStream(Block result_header);
SortDescription getSortDescription() const { return result_description; }
private: private:
enum class Type enum class Type

View File

@ -3110,7 +3110,8 @@ void MergeTreeData::tryRemovePartImmediately(DataPartPtr && part)
{ {
auto lock = lockParts(); auto lock = lockParts();
LOG_TRACE(log, "Trying to immediately remove part {}", part->getNameWithState()); auto part_name_with_state = part->getNameWithState();
LOG_TRACE(log, "Trying to immediately remove part {}", part_name_with_state);
if (part->getState() != DataPartState::Temporary) if (part->getState() != DataPartState::Temporary)
{ {
@ -3121,7 +3122,16 @@ void MergeTreeData::tryRemovePartImmediately(DataPartPtr && part)
part.reset(); part.reset();
if (!((*it)->getState() == DataPartState::Outdated && it->unique())) if (!((*it)->getState() == DataPartState::Outdated && it->unique()))
{
if ((*it)->getState() != DataPartState::Outdated)
LOG_WARNING(log, "Cannot immediately remove part {} because it's not in Outdated state "
"usage counter {}", part_name_with_state, it->use_count());
if (!it->unique())
LOG_WARNING(log, "Cannot immediately remove part {} because someone using it right now "
"usage counter {}", part_name_with_state, it->use_count());
return; return;
}
modifyPartState(it, DataPartState::Deleting); modifyPartState(it, DataPartState::Deleting);
@ -3566,7 +3576,12 @@ void MergeTreeData::checkAlterPartitionIsPossible(
void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition, ContextPtr local_context) void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition, ContextPtr local_context)
{ {
const String partition_id = getPartitionIDFromQuery(partition, local_context); const String partition_id = getPartitionIDFromQuery(partition, local_context);
auto parts_to_remove = getVisibleDataPartsVectorInPartition(local_context, partition_id); DataPartsVector parts_to_remove;
const auto * partition_ast = partition->as<ASTPartition>();
if (partition_ast && partition_ast->all)
parts_to_remove = getVisibleDataPartsVector(local_context);
else
parts_to_remove = getVisibleDataPartsVectorInPartition(local_context, partition_id);
UInt64 partition_size = 0; UInt64 partition_size = 0;
@ -4020,6 +4035,8 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc
auto metadata_snapshot = getInMemoryMetadataPtr(); auto metadata_snapshot = getInMemoryMetadataPtr();
const Block & key_sample_block = metadata_snapshot->getPartitionKey().sample_block; const Block & key_sample_block = metadata_snapshot->getPartitionKey().sample_block;
if (partition_ast.all)
return "ALL";
size_t fields_count = key_sample_block.columns(); size_t fields_count = key_sample_block.columns();
if (partition_ast.fields_count != fields_count) if (partition_ast.fields_count != fields_count)
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, throw Exception(ErrorCodes::INVALID_PARTITION_VALUE,

View File

@ -1474,7 +1474,11 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont
/// This protects against "revival" of data for a removed partition after completion of merge. /// This protects against "revival" of data for a removed partition after completion of merge.
auto merge_blocker = stopMergesAndWait(); auto merge_blocker = stopMergesAndWait();
String partition_id = getPartitionIDFromQuery(partition, local_context); String partition_id = getPartitionIDFromQuery(partition, local_context);
parts_to_remove = getVisibleDataPartsVectorInPartition(local_context, partition_id); const auto * partition_ast = partition->as<ASTPartition>();
if (partition_ast && partition_ast->all)
parts_to_remove = getVisibleDataPartsVector(local_context);
else
parts_to_remove = getVisibleDataPartsVectorInPartition(local_context, partition_id);
/// TODO should we throw an exception if parts_to_remove is empty? /// TODO should we throw an exception if parts_to_remove is empty?
removePartsFromWorkingSet(local_context->getCurrentTransaction().get(), parts_to_remove, true); removePartsFromWorkingSet(local_context->getCurrentTransaction().get(), parts_to_remove, true);

View File

@ -43,6 +43,7 @@
#include <Parsers/ASTDropQuery.h> #include <Parsers/ASTDropQuery.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
#include <Parsers/ASTOptimizeQuery.h> #include <Parsers/ASTOptimizeQuery.h>
#include <Parsers/ASTPartition.h>
#include <Parsers/ASTLiteral.h> #include <Parsers/ASTLiteral.h>
#include <Parsers/queryToString.h> #include <Parsers/queryToString.h>
#include <Parsers/ASTCheckQuery.h> #include <Parsers/ASTCheckQuery.h>
@ -3322,7 +3323,7 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n
if (!broken_part_info.contains(part->info)) if (!broken_part_info.contains(part->info))
continue; continue;
/// Broken part itself ether already moved to detached or does not exist. /// Broken part itself either already moved to detached or does not exist.
assert(broken_part_info != part->info); assert(broken_part_info != part->info);
part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr()); part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr());
} }
@ -4943,15 +4944,37 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & partition, bool de
throw Exception("DROP PARTITION cannot be done on this replica because it is not a leader", ErrorCodes::NOT_A_LEADER); throw Exception("DROP PARTITION cannot be done on this replica because it is not a leader", ErrorCodes::NOT_A_LEADER);
zkutil::ZooKeeperPtr zookeeper = getZooKeeperAndAssertNotReadonly(); zkutil::ZooKeeperPtr zookeeper = getZooKeeperAndAssertNotReadonly();
LogEntry entry;
String partition_id = getPartitionIDFromQuery(partition, query_context); const auto * partition_ast = partition->as<ASTPartition>();
bool did_drop = dropAllPartsInPartition(*zookeeper, partition_id, entry, query_context, detach); if (partition_ast && partition_ast->all)
if (did_drop)
{ {
waitForLogEntryToBeProcessedIfNecessary(entry, query_context); Strings partitions = zookeeper->getChildren(fs::path(zookeeper_path) / "block_numbers");
cleanLastPartNode(partition_id);
std::vector<std::pair<String, std::unique_ptr<LogEntry>>> entries_with_partitionid_to_drop;
entries_with_partitionid_to_drop.reserve(partitions.size());
for (String & partition_id : partitions)
{
auto entry = std::make_unique<LogEntry>();
if (dropAllPartsInPartition(*zookeeper, partition_id, *entry, query_context, detach))
entries_with_partitionid_to_drop.emplace_back(partition_id, std::move(entry));
}
for (const auto & entry : entries_with_partitionid_to_drop)
{
waitForLogEntryToBeProcessedIfNecessary(*entry.second, query_context);
cleanLastPartNode(entry.first);
}
}
else
{
LogEntry entry;
String partition_id = getPartitionIDFromQuery(partition, query_context);
bool did_drop = dropAllPartsInPartition(*zookeeper, partition_id, entry, query_context, detach);
if (did_drop)
{
waitForLogEntryToBeProcessedIfNecessary(entry, query_context);
cleanLastPartNode(partition_id);
}
} }
} }

View File

@ -162,7 +162,7 @@ def check_pr_description(pr_info):
i = 0 i = 0
while i < len(lines): while i < len(lines):
if re.match(r"(?i)^[>*_ ]*change\s*log\s*category", lines[i]): if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
i += 1 i += 1
if i >= len(lines): if i >= len(lines):
break break
@ -191,7 +191,7 @@ def check_pr_description(pr_info):
return result_status[:140], category return result_status[:140], category
elif re.match( elif re.match(
r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
): ):
i += 1 i += 1
# Can have one empty line between header and the entry itself. # Can have one empty line between header and the entry itself.
@ -262,9 +262,14 @@ if __name__ == "__main__":
remove_labels(gh, pr_info, pr_labels_to_remove) remove_labels(gh, pr_info, pr_labels_to_remove)
if description_report: if description_report:
print("::notice ::Cannot run, description does not match the template") print(
"::error ::Cannot run, PR description does not match the template: "
f"{description_report}"
)
logging.info( logging.info(
"PR body doesn't match the template: (start)\n%s\n(end)", pr_info.body "PR body doesn't match the template: (start)\n%s\n(end)\n" "Reason: %s",
pr_info.body,
description_report,
) )
url = ( url = (
f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/" f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/"

View File

@ -0,0 +1,4 @@
<test>
<query>SELECT sipHash64(number) FROM numbers(1e8) ORDER BY number LIMIT 5</query>
<query>SELECT sipHash64(number) FROM numbers(1e8) ORDER BY number + 1 LIMIT 5</query>
</test>

View File

@ -0,0 +1,53 @@
<test>
<substitutions>
<substitution>
<name>array_type</name>
<values>
<value>Int8</value>
<value>Int16</value>
<value>Int32</value>
<value>Int64</value>
</values>
</substitution>
</substitutions>
<create_query>
CREATE TABLE test_table_small_{array_type}
(
`set` Array({array_type}),
`subset` Array ({array_type})
)
ENGINE = MergeTree ORDER BY set;
</create_query>
<create_query>
CREATE TABLE test_table_medium_{array_type}
(
`set` Array({array_type}),
`subset` Array ({array_type})
)
ENGINE = MergeTree ORDER BY set;
</create_query>
<create_query>
CREATE TABLE test_table_large_{array_type}
(
`set` Array({array_type}),
`subset` Array ({array_type})
)
ENGINE = MergeTree ORDER BY set;
</create_query>
<fill_query>INSERT INTO test_table_small_{array_type} SELECT groupArraySample(5000)(rand64()) AS set, groupArraySample(500)(rand64()) AS subset FROM numbers(10000000) GROUP BY number % 5000;</fill_query>
<fill_query>INSERT INTO test_table_medium_{array_type} SELECT groupArraySample(50000)(rand64()) AS set, groupArraySample(5000)(rand64()) AS subset FROM numbers(25000000) GROUP BY number % 50000;</fill_query>
<fill_query>INSERT INTO test_table_large_{array_type} SELECT groupArraySample(500000)(rand64()) AS set, groupArraySample(500000)(rand64()) AS subset FROM numbers(50000000) GROUP BY number % 500000;</fill_query>
<query>SELECT hasAll(set, subset) FROM test_table_small_{array_type} FORMAT Null</query>
<query>SELECT hasAll(set, subset) FROM test_table_medium_{array_type} FORMAT Null</query>
<query>SELECT hasAll(set, subset) FROM test_table_large_{array_type} FORMAT Null</query>
<drop_query>DROP TABLE IF EXISTS test_table_small_{array_type}</drop_query>
<drop_query>DROP TABLE IF EXISTS test_table_medium_{array_type}</drop_query>
<drop_query>DROP TABLE IF EXISTS test_table_large_{array_type}</drop_query>
</test>

View File

@ -10,3 +10,15 @@
5 2 5 2
6 3 6 3
7 3 7 3
4 2
5 2
1 1
2 1
3 1
1 1
2 1
3 1
1 1
2 2
1 1
1 1

View File

@ -19,4 +19,53 @@ INSERT INTO alter_attach VALUES (6, 3), (7, 3);
ALTER TABLE alter_attach ATTACH PARTITION 2; ALTER TABLE alter_attach ATTACH PARTITION 2;
SELECT * FROM alter_attach ORDER BY x; SELECT * FROM alter_attach ORDER BY x;
ALTER TABLE alter_attach DETACH PARTITION ALL;
SELECT * FROM alter_attach ORDER BY x;
ALTER TABLE alter_attach ATTACH PARTITION 2;
SELECT * FROM alter_attach ORDER BY x;
DROP TABLE IF EXISTS detach_all_no_partition;
CREATE TABLE detach_all_no_partition (x UInt64, p UInt8) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO detach_all_no_partition VALUES (1, 1), (2, 1), (3, 1);
SELECT * FROM detach_all_no_partition ORDER BY x;
ALTER TABLE detach_all_no_partition DETACH PARTITION ALL;
SELECT * FROM detach_all_no_partition ORDER BY x;
ALTER TABLE detach_all_no_partition ATTACH PARTITION tuple();
SELECT * FROM detach_all_no_partition ORDER BY x;
DROP TABLE alter_attach; DROP TABLE alter_attach;
DROP TABLE detach_all_no_partition;
DROP TABLE IF EXISTS replicated_table_detach_all1;
DROP TABLE IF EXISTS replicated_table_detach_all2;
CREATE TABLE replicated_table_detach_all1 (
id UInt64,
Data String
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00753_{database}/replicated_table_detach_all', '1') ORDER BY id PARTITION BY id;
CREATE TABLE replicated_table_detach_all2 (
id UInt64,
Data String
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00753_{database}/replicated_table_detach_all', '2') ORDER BY id PARTITION BY id;
INSERT INTO replicated_table_detach_all1 VALUES (1, '1'), (2, '2');
select * from replicated_table_detach_all1 order by id;
ALTER TABLE replicated_table_detach_all1 DETACH PARTITION ALL;
select * from replicated_table_detach_all1 order by id;
SYSTEM SYNC REPLICA replicated_table_detach_all2;
select * from replicated_table_detach_all2 order by id;
ALTER TABLE replicated_table_detach_all1 ATTACH PARTITION tuple(1);
select * from replicated_table_detach_all1 order by id;
SYSTEM SYNC REPLICA replicated_table_detach_all2;
select * from replicated_table_detach_all2 order by id;
DROP TABLE replicated_table_detach_all1;
DROP TABLE replicated_table_detach_all2;

View File

@ -1,3 +1,4 @@
1000 1000
0 0
1000 1000
0

View File

@ -21,4 +21,8 @@ ALTER TABLE table_01 ATTACH PART '20191001_1_1_0';
SELECT COUNT() FROM table_01; SELECT COUNT() FROM table_01;
ALTER TABLE table_01 DETACH PARTITION ALL;
SELECT COUNT() FROM table_01;
DROP TABLE IF EXISTS table_01; DROP TABLE IF EXISTS table_01;

View File

@ -35,10 +35,11 @@ Expression (Projection)
ReadFromMergeTree (default.test_table) ReadFromMergeTree (default.test_table)
Expression (Projection) Expression (Projection)
Limit (preliminary LIMIT (without OFFSET)) Limit (preliminary LIMIT (without OFFSET))
Sorting Expression (Before ORDER BY [lifted up part])
Expression (Before ORDER BY) Sorting
SettingQuotaAndLimits (Set limits and quota after reading from storage) Expression (Before ORDER BY)
ReadFromMergeTree (default.test_table) SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromMergeTree (default.test_table)
optimize_aggregation_in_order optimize_aggregation_in_order
Expression ((Projection + Before ORDER BY)) Expression ((Projection + Before ORDER BY))
Aggregating Aggregating

View File

@ -925,10 +925,11 @@ Expression ((Projection + Before ORDER BY))
Window (Window step for window \'ORDER BY o ASC, number ASC\') Window (Window step for window \'ORDER BY o ASC, number ASC\')
Sorting (Sorting for window \'ORDER BY o ASC, number ASC\') Sorting (Sorting for window \'ORDER BY o ASC, number ASC\')
Window (Window step for window \'ORDER BY number ASC\') Window (Window step for window \'ORDER BY number ASC\')
Sorting (Sorting for window \'ORDER BY number ASC\') Expression ((Before window functions + (Projection + Before ORDER BY)) [lifted up part])
Expression ((Before window functions + (Projection + Before ORDER BY))) Sorting (Sorting for window \'ORDER BY number ASC\')
SettingQuotaAndLimits (Set limits and quota after reading from storage) Expression ((Before window functions + (Projection + Before ORDER BY)))
ReadFromStorage (SystemNumbers) SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromStorage (SystemNumbers)
-- A test case for the sort comparator found by fuzzer. -- A test case for the sort comparator found by fuzzer.
SELECT SELECT
max(number) OVER (ORDER BY number DESC NULLS FIRST), max(number) OVER (ORDER BY number DESC NULLS FIRST),

View File

@ -10,8 +10,8 @@ set max_block_size=40960;
-- MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption -- MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption
-- MergeSortingTransform: Memory usage is lowered from 186.25 MiB to 95.00 MiB -- MergeSortingTransform: Memory usage is lowered from 186.25 MiB to 95.00 MiB
-- MergeSortingTransform: Re-merging is not useful (memory usage was not lowered by remerge_sort_lowered_memory_bytes_ratio=2.0) -- MergeSortingTransform: Re-merging is not useful (memory usage was not lowered by remerge_sort_lowered_memory_bytes_ratio=2.0)
select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by k limit 400e3 format Null; -- { serverError 241 } select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by v1, v2 limit 400e3 format Null; -- { serverError 241 }
select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by k limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 } select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by v1, v2 limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 }
-- remerge_sort_lowered_memory_bytes_ratio 1.9 is good (need at least 1.91/0.98=1.94) -- remerge_sort_lowered_memory_bytes_ratio 1.9 is good (need at least 1.91/0.98=1.94)
-- MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption -- MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption

View File

@ -142,3 +142,12 @@ Filter
Filter Filter
2 3 2 3
2 3 2 3
> function calculation should be done after sorting and limit (if possible)
> Expression should be divided into two subexpressions and only one of them should be moved after Sorting
Expression (Before ORDER BY [lifted up part])
FUNCTION sipHash64
Sorting
Expression (Before ORDER BY)
FUNCTION plus
> this query should be executed without throwing an exception
0

View File

@ -196,3 +196,12 @@ $CLICKHOUSE_CLIENT -q "
select a, b from ( select a, b from (
select number + 1 as a, number + 2 as b from numbers(2) union all select number + 1 as b, number + 2 as a from numbers(2) select number + 1 as a, number + 2 as b from numbers(2) union all select number + 1 as b, number + 2 as a from numbers(2)
) where a != 1 settings enable_optimize_predicate_expression = 0" ) where a != 1 settings enable_optimize_predicate_expression = 0"
echo "> function calculation should be done after sorting and limit (if possible)"
echo "> Expression should be divided into two subexpressions and only one of them should be moved after Sorting"
$CLICKHOUSE_CLIENT -q "
explain actions = 1 select number as n, sipHash64(n) from numbers(100) order by number + 1 limit 5" |
sed 's/^ *//g' | grep -o "^ *\(Expression (Before ORDER BY.*)\|Sorting\|FUNCTION \w\+\)"
echo "> this query should be executed without throwing an exception"
$CLICKHOUSE_CLIENT -q "
select throwIf(number = 5) from (select * from numbers(10)) order by number limit 1"

View File

@ -7,13 +7,15 @@
ExpressionTransform ExpressionTransform
(Limit) (Limit)
Limit Limit
(Sorting) (Expression)
MergingSortedTransform 2 → 1 ExpressionTransform
(Expression) (Sorting)
ExpressionTransform × 2 MergingSortedTransform 2 → 1
(SettingQuotaAndLimits) (Expression)
(ReadFromMergeTree) ExpressionTransform × 2
MergeTreeInOrder × 2 0 → 1 (SettingQuotaAndLimits)
(ReadFromMergeTree)
MergeTreeInOrder × 2 0 → 1
2020-10-01 9 2020-10-01 9
2020-10-01 9 2020-10-01 9
2020-10-01 9 2020-10-01 9
@ -23,16 +25,18 @@ ExpressionTransform
ExpressionTransform ExpressionTransform
(Limit) (Limit)
Limit Limit
(Sorting) (Expression)
MergingSortedTransform 2 → 1 ExpressionTransform
(Expression) (Sorting)
ExpressionTransform × 2 MergingSortedTransform 2 → 1
(SettingQuotaAndLimits) (Expression)
(ReadFromMergeTree) ExpressionTransform × 2
ReverseTransform (SettingQuotaAndLimits)
MergeTreeReverse 0 → 1 (ReadFromMergeTree)
ReverseTransform ReverseTransform
MergeTreeReverse 0 → 1 MergeTreeReverse 0 → 1
ReverseTransform
MergeTreeReverse 0 → 1
2020-10-01 9 2020-10-01 9
2020-10-01 9 2020-10-01 9
2020-10-01 9 2020-10-01 9
@ -42,15 +46,17 @@ ExpressionTransform
ExpressionTransform ExpressionTransform
(Limit) (Limit)
Limit Limit
(Sorting) (Expression)
FinishSortingTransform ExpressionTransform
PartialSortingTransform (Sorting)
MergingSortedTransform 2 → 1 FinishSortingTransform
(Expression) PartialSortingTransform
ExpressionTransform × 2 MergingSortedTransform 2 → 1
(SettingQuotaAndLimits) (Expression)
(ReadFromMergeTree) ExpressionTransform × 2
MergeTreeInOrder × 2 0 → 1 (SettingQuotaAndLimits)
(ReadFromMergeTree)
MergeTreeInOrder × 2 0 → 1
2020-10-11 0 2020-10-11 0
2020-10-11 0 2020-10-11 0
2020-10-11 0 2020-10-11 0