Merge pull request #13099 from ClickHouse/fixed-hash-map-implicit-zero-6

Slightly improve performance of aggregation by UInt8/UInt16 keys, part 6
This commit is contained in:
alexey-milovidov 2020-08-09 23:13:53 +03:00 committed by GitHub
commit 180ea39bf8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 785 additions and 94 deletions

View File

@ -17,6 +17,11 @@
#include <IO/WriteHelpers.h>
#if !__clang__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Warray-bounds"
#endif
namespace DB
{
@ -250,3 +255,8 @@ public:
}
#if !__clang__
#pragma GCC diagnostic pop
#endif

View File

@ -78,7 +78,7 @@ public:
/// Get `sizeof` of structure with data.
virtual size_t sizeOfData() const = 0;
/// How the data structure should be aligned. NOTE: Currently not used (structures with aggregation state are put without alignment).
/// How the data structure should be aligned.
virtual size_t alignOfData() const = 0;
/** Adds a value into aggregation data on which place points to.
@ -222,24 +222,6 @@ public:
static_cast<const Derived *>(this)->add(places[i] + place_offset, columns, i, arena);
}
void addBatchLookupTable8(
size_t batch_size,
AggregateDataPtr * places,
size_t place_offset,
std::function<void(AggregateDataPtr &)> init,
const UInt8 * key,
const IColumn ** columns,
Arena * arena) const override
{
for (size_t i = 0; i < batch_size; ++i)
{
AggregateDataPtr & place = places[key[i]];
if (unlikely(!place))
init(place);
static_cast<const Derived *>(this)->add(place + place_offset, columns, i, arena);
}
}
void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const override
{
for (size_t i = 0; i < batch_size; ++i)
@ -274,6 +256,45 @@ public:
current_offset = next_offset;
}
}
void addBatchLookupTable8(
size_t batch_size,
AggregateDataPtr * map,
size_t place_offset,
std::function<void(AggregateDataPtr &)> init,
const UInt8 * key,
const IColumn ** columns,
Arena * arena) const override
{
static constexpr size_t UNROLL_COUNT = 8;
size_t i = 0;
size_t batch_size_unrolled = batch_size / UNROLL_COUNT * UNROLL_COUNT;
for (; i < batch_size_unrolled; i += UNROLL_COUNT)
{
AggregateDataPtr places[UNROLL_COUNT];
for (size_t j = 0; j < UNROLL_COUNT; ++j)
{
AggregateDataPtr & place = map[key[i + j]];
if (unlikely(!place))
init(place);
places[j] = place;
}
for (size_t j = 0; j < UNROLL_COUNT; ++j)
static_cast<const Derived *>(this)->add(places[j] + place_offset, columns, i + j, arena);
}
for (; i < batch_size; ++i)
{
AggregateDataPtr & place = map[key[i]];
if (unlikely(!place))
init(place);
static_cast<const Derived *>(this)->add(place + place_offset, columns, i, arena);
}
}
};
@ -311,11 +332,85 @@ public:
return sizeof(Data);
}
/// NOTE: Currently not used (structures with aggregation state are put without alignment).
size_t alignOfData() const override
{
return alignof(Data);
}
void addBatchLookupTable8(
size_t batch_size,
AggregateDataPtr * map,
size_t place_offset,
std::function<void(AggregateDataPtr &)> init,
const UInt8 * key,
const IColumn ** columns,
Arena * arena) const override
{
const Derived & func = *static_cast<const Derived *>(this);
/// If the function is complex or too large, use more generic algorithm.
if (func.allocatesMemoryInArena() || sizeof(Data) > 16 || func.sizeOfData() != sizeof(Data))
{
IAggregateFunctionHelper<Derived>::addBatchLookupTable8(batch_size, map, place_offset, init, key, columns, arena);
return;
}
/// Will use UNROLL_COUNT number of lookup tables.
static constexpr size_t UNROLL_COUNT = 4;
std::unique_ptr<Data[]> places{new Data[256 * UNROLL_COUNT]};
bool has_data[256 * UNROLL_COUNT]{}; /// Separate flags array to avoid heavy initialization.
size_t i = 0;
/// Aggregate data into different lookup tables.
size_t batch_size_unrolled = batch_size / UNROLL_COUNT * UNROLL_COUNT;
for (; i < batch_size_unrolled; i += UNROLL_COUNT)
{
for (size_t j = 0; j < UNROLL_COUNT; ++j)
{
size_t idx = j * 256 + key[i + j];
if (unlikely(!has_data[idx]))
{
new (&places[idx]) Data;
has_data[idx] = true;
}
func.add(reinterpret_cast<char *>(&places[idx]), columns, i + j, nullptr);
}
}
/// Merge data from every lookup table to the final destination.
for (size_t k = 0; k < 256; ++k)
{
for (size_t j = 0; j < UNROLL_COUNT; ++j)
{
if (has_data[j * 256 + k])
{
AggregateDataPtr & place = map[k];
if (unlikely(!place))
init(place);
func.merge(place + place_offset, reinterpret_cast<const char *>(&places[256 * j + k]), arena);
}
}
}
/// Process tails and add directly to the final destination.
for (; i < batch_size; ++i)
{
size_t k = key[i];
AggregateDataPtr & place = map[k];
if (unlikely(!place))
init(place);
func.add(place + place_offset, columns, i, nullptr);
}
}
};

View File

@ -79,3 +79,6 @@ target_link_libraries (memory_statistics_os_perf PRIVATE clickhouse_common_io)
add_executable (procfs_metrics_provider_perf procfs_metrics_provider_perf.cpp)
target_link_libraries (procfs_metrics_provider_perf PRIVATE clickhouse_common_io)
add_executable (average average.cpp)
target_link_libraries (average PRIVATE clickhouse_common_io)

View File

@ -0,0 +1,583 @@
#include <iostream>
#include <string>
#include <fmt/format.h>
#include <Core/Types.h>
#include <Common/PODArray.h>
#include <Common/HashTable/FixedHashMap.h>
#include <Common/Arena.h>
#include <Common/Stopwatch.h>
/** This test program evaluates different solutions for a simple degenerate task:
* Aggregate data by UInt8 key, calculate "avg" function on Float values.
*
* It tests the overhead of various data structures in comparison to the minimal code doing the same task.
* It also tests what does it cost to access aggregation state via single pointer indirection.
* Also it evaluates various ways to unroll the loop.
* And finally it compares with one solution involving bucket sort.
*
* How to use:
*
* for i in {1..10}; do src/Common/tests/average 100000000 1; done
*
* You will find the numbers for various options below.
*/
using namespace DB;
using Float = Float32;
struct State
{
Float sum = 0;
size_t count = 0;
void add(Float value)
{
sum += value;
++count;
}
template <size_t unroll_count = 128 / sizeof(Float)>
void addBatch(const Float * ptr, size_t size)
{
/// Compiler cannot unroll this loop, do it manually.
/// (at least for floats, most likely due to the lack of -fassociative-math)
Float partial_sums[unroll_count]{};
const auto * end = ptr + size;
const auto * unrolled_end = ptr + (size / unroll_count * unroll_count);
while (ptr < unrolled_end)
{
for (size_t i = 0; i < unroll_count; ++i)
partial_sums[i] += ptr[i];
ptr += unroll_count;
}
for (size_t i = 0; i < unroll_count; ++i)
sum += partial_sums[i];
while (ptr < end)
{
sum += *ptr;
++ptr;
}
count += size;
}
void merge(const State & other)
{
sum += other.sum;
count += other.count;
}
Float result() const
{
return sum / count;
}
bool operator!() const
{
return !count;
}
};
using StatePtr = State *;
Float NO_INLINE baseline_baseline(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
Arena arena;
HashMap<UInt8, StatePtr> map;
size_t size = keys.size();
for (size_t i = 0; i < size; ++i)
{
StatePtr & place = map[keys[i]];
if (unlikely(!place))
place = new (arena.alloc<State>()) State();
place->add(values[i]);
}
return map[0] ? map[0]->result() : 0;
}
Float NO_INLINE baseline(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
Arena arena;
FixedHashMap<UInt8, StatePtr> map;
size_t size = keys.size();
for (size_t i = 0; i < size; ++i)
{
StatePtr & place = map[keys[i]];
if (unlikely(!place))
place = new (arena.alloc<State>()) State();
place->add(values[i]);
}
return map[0] ? map[0]->result() : 0;
}
Float NO_INLINE implicit_zero(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
Arena arena;
FixedImplicitZeroHashMap<UInt8, StatePtr> map;
size_t size = keys.size();
for (size_t i = 0; i < size; ++i)
{
StatePtr & place = map[keys[i]];
if (unlikely(!place))
place = new (arena.alloc<State>()) State();
place->add(values[i]);
}
return map[0] ? map[0]->result() : 0;
}
template <typename Key, typename Mapped>
using FixedHashMapWithCalculatedSize = FixedHashMap<
Key,
Mapped,
FixedHashMapCell<Key, Mapped>,
FixedHashTableCalculatedSize<FixedHashMapCell<Key, Mapped>>>;
Float NO_INLINE calculated_size(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
Arena arena;
FixedHashMapWithCalculatedSize<UInt8, StatePtr> map;
size_t size = keys.size();
for (size_t i = 0; i < size; ++i)
{
StatePtr & place = map[keys[i]];
if (unlikely(!place))
place = new (arena.alloc<State>()) State();
place->add(values[i]);
}
return map[0] ? map[0]->result() : 0;
}
Float NO_INLINE implicit_zero_and_calculated_size(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
Arena arena;
FixedImplicitZeroHashMapWithCalculatedSize<UInt8, StatePtr> map;
size_t size = keys.size();
for (size_t i = 0; i < size; ++i)
{
StatePtr & place = map[keys[i]];
if (unlikely(!place))
place = new (arena.alloc<State>()) State();
place->add(values[i]);
}
return map[0] ? map[0]->result() : 0;
}
Float NO_INLINE init_out_of_the_loop(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
Arena arena;
FixedImplicitZeroHashMapWithCalculatedSize<UInt8, StatePtr> map;
for (size_t i = 0; i < 256; ++i)
map[i] = new (arena.alloc<State>()) State();
size_t size = keys.size();
for (size_t i = 0; i < size; ++i)
{
StatePtr & place = map[keys[i]];
place->add(values[i]);
}
return map[0] ? map[0]->result() : 0;
}
Float NO_INLINE embedded_states(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
FixedImplicitZeroHashMapWithCalculatedSize<UInt8, State> map;
size_t size = keys.size();
for (size_t i = 0; i < size; ++i)
{
State & place = map[keys[i]];
place.add(values[i]);
}
return map[0].result();
}
Float NO_INLINE simple_lookup_table(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
Arena arena;
StatePtr map[256]{};
size_t size = keys.size();
for (size_t i = 0; i < size; ++i)
{
StatePtr & place = map[keys[i]];
if (unlikely(!place))
place = new (arena.alloc<State>()) State();
place->add(values[i]);
}
return map[0] ? map[0]->result() : 0;
}
Float NO_INLINE simple_lookup_table_embedded_states(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
State map[256]{};
size_t size = keys.size();
for (size_t i = 0; i < size; ++i)
map[keys[i]].add(values[i]);
return map[0].result();
}
template <size_t UNROLL_COUNT>
Float NO_INLINE unrolled(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
Arena arena;
FixedImplicitZeroHashMapWithCalculatedSize<UInt8, StatePtr> map;
size_t size = keys.size();
size_t i = 0;
size_t size_unrolled = size / UNROLL_COUNT * UNROLL_COUNT;
for (; i < size_unrolled; i += UNROLL_COUNT)
{
StatePtr places[UNROLL_COUNT];
for (size_t j = 0; j < UNROLL_COUNT; ++j)
{
StatePtr & place = map[keys[i + j]];
if (unlikely(!place))
place = new (arena.alloc<State>()) State();
places[j] = place;
}
for (size_t j = 0; j < UNROLL_COUNT; ++j)
places[j]->add(values[i + j]);
}
for (; i < size; ++i)
{
StatePtr & place = map[keys[i]];
if (unlikely(!place))
place = new (arena.alloc<State>()) State();
place->add(values[i]);
}
return map[0] ? map[0]->result() : 0;
}
template <size_t UNROLL_COUNT>
Float NO_INLINE simple_lookup_table_unrolled(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
Arena arena;
StatePtr map[256]{};
size_t size = keys.size();
size_t i = 0;
size_t size_unrolled = size / UNROLL_COUNT * UNROLL_COUNT;
for (; i < size_unrolled; i += UNROLL_COUNT)
{
StatePtr places[UNROLL_COUNT];
for (size_t j = 0; j < UNROLL_COUNT; ++j)
{
StatePtr & place = map[keys[i + j]];
if (unlikely(!place))
place = new (arena.alloc<State>()) State();
places[j] = place;
}
for (size_t j = 0; j < UNROLL_COUNT; ++j)
places[j]->add(values[i + j]);
}
for (; i < size; ++i)
{
StatePtr & place = map[keys[i]];
if (unlikely(!place))
place = new (arena.alloc<State>()) State();
place->add(values[i]);
}
return map[0] ? map[0]->result() : 0;
}
template <size_t UNROLL_COUNT>
Float NO_INLINE embedded_states_unrolled(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
FixedImplicitZeroHashMapWithCalculatedSize<UInt8, State> map;
size_t size = keys.size();
size_t i = 0;
size_t size_unrolled = size / UNROLL_COUNT * UNROLL_COUNT;
for (; i < size_unrolled; i += UNROLL_COUNT)
{
StatePtr places[UNROLL_COUNT];
for (size_t j = 0; j < UNROLL_COUNT; ++j)
places[j] = &map[keys[i + j]];
for (size_t j = 0; j < UNROLL_COUNT; ++j)
places[j]->add(values[i + j]);
}
for (; i < size; ++i)
{
State & place = map[keys[i]];
place.add(values[i]);
}
return map[0].result();
}
template <size_t UNROLL_COUNT>
Float NO_INLINE simple_lookup_table_embedded_states_unrolled(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
State map[256]{};
size_t size = keys.size();
size_t i = 0;
size_t size_unrolled = size / UNROLL_COUNT * UNROLL_COUNT;
for (; i < size_unrolled; i += UNROLL_COUNT)
{
StatePtr places[UNROLL_COUNT];
for (size_t j = 0; j < UNROLL_COUNT; ++j)
places[j] = &map[keys[i + j]];
for (size_t j = 0; j < UNROLL_COUNT; ++j)
places[j]->add(values[i + j]);
}
for (; i < size; ++i)
{
State & place = map[keys[i]];
place.add(values[i]);
}
return map[0].result();
}
template <size_t UNROLL_COUNT>
Float NO_INLINE microsort(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
State map[256]{};
size_t size = keys.size();
/// Calculate histograms of keys.
using CountType = UInt32;
static constexpr size_t HISTOGRAM_SIZE = 256;
CountType count[HISTOGRAM_SIZE * UNROLL_COUNT]{};
size_t unrolled_size = size / UNROLL_COUNT * UNROLL_COUNT;
for (const UInt8 * elem = keys.data(); elem < keys.data() + unrolled_size; elem += UNROLL_COUNT)
for (size_t i = 0; i < UNROLL_COUNT; ++i)
++count[i * HISTOGRAM_SIZE + elem[i]];
for (const UInt8 * elem = keys.data() + unrolled_size; elem < keys.data() + size; ++elem)
++count[*elem];
for (size_t i = 0; i < HISTOGRAM_SIZE; ++i)
for (size_t j = 1; j < UNROLL_COUNT; ++j)
count[i] += count[j * HISTOGRAM_SIZE + i];
/// Row indices in a batch for each key.
PODArray<UInt32> indices(size);
UInt32 * positions[HISTOGRAM_SIZE];
positions[0] = indices.data();
for (size_t i = 1; i < HISTOGRAM_SIZE; ++i)
positions[i] = positions[i - 1] + count[i - 1];
for (size_t i = 0; i < size; ++i)
*positions[keys[i]]++ = i;
/// Update states.
UInt32 * idx = indices.data();
for (size_t i = 0; i < HISTOGRAM_SIZE; ++i)
for (; idx < positions[i]; ++idx)
map[i].add(values[*idx]);
return map[0].result();
}
Float NO_INLINE buffered(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
State map[256]{};
static constexpr size_t BUF_SIZE = 16384 / 256 / sizeof(Float); /// Should fit in L1d.
Float buffers[256 * BUF_SIZE];
Float * ptrs[256];
for (size_t i = 0; i < 256; ++i)
ptrs[i] = &buffers[i * BUF_SIZE];
size_t size = keys.size();
const auto * key = keys.data();
const auto * key_end = key + size;
const auto * value = values.data();
while (key < key_end)
{
*ptrs[*key] = *value;
if (++ptrs[*key] == &buffers[(*key + 1) * BUF_SIZE]) /// Calculation is better than L1d load of cached end pointer.
{
ptrs[*key] -= BUF_SIZE;
map[*key].addBatch<BUF_SIZE>(ptrs[*key], BUF_SIZE);
}
++key;
++value;
}
for (size_t i = 0; i < 256; ++i)
map[i].addBatch<4>(&buffers[i * BUF_SIZE], ptrs[i] - &buffers[i * BUF_SIZE]);
return map[0].result();
}
template <size_t UNROLL_COUNT>
Float NO_INLINE really_unrolled(const PODArray<UInt8> & keys, const PODArray<Float> & values)
{
State map[256 * UNROLL_COUNT]{};
size_t size = keys.size();
size_t i = 0;
size_t size_unrolled = size / UNROLL_COUNT * UNROLL_COUNT;
for (; i < size_unrolled; i += UNROLL_COUNT)
for (size_t j = 0; j < UNROLL_COUNT; ++j)
map[256 * j + keys[i + j]].add(values[i + j]);
for (size_t key = 0; key < 256; ++key)
for (size_t j = 1; j < UNROLL_COUNT; ++j)
map[key].merge(map[256 * j + key]);
for (; i < size; ++i)
map[keys[i]].add(values[i]);
return map[0].result();
}
int main(int argc, char ** argv)
{
size_t size = argc > 1 ? std::stoull(argv[1]) : 1000000000;
size_t variant = argc > 2 ? std::stoull(argv[2]) : 1;
PODArray<UInt8> keys(size);
PODArray<Float> values(size);
/// Fill source data
for (size_t i = 0; i < size; ++i)
{
keys[i] = __builtin_ctz(i + 1); /// Make keys to have just slightly more realistic distribution.
values[i] = 1234.5; /// The distribution of values does not affect execution speed.
}
/// Aggregate
Stopwatch watch;
Float res{};
switch (variant)
{
case 0: res = baseline(keys, values); break;
case 1: res = implicit_zero(keys, values); break;
case 2: res = calculated_size(keys, values); break;
case 3: res = implicit_zero_and_calculated_size(keys, values); break;
case 4: res = init_out_of_the_loop(keys, values); break;
case 5: res = embedded_states(keys, values); break;
case 6: res = simple_lookup_table(keys, values); break;
case 7: res = simple_lookup_table_embedded_states(keys, values); break;
case 8: res = microsort<1>(keys, values); break;
case 9: res = baseline_baseline(keys, values); break;
case 10: res = buffered(keys, values); break;
case 11: res = really_unrolled<1>(keys, values); break;
case 32: res = unrolled<2>(keys, values); break;
case 34: res = unrolled<4>(keys, values); break;
case 36: res = unrolled<6>(keys, values); break;
case 38: res = unrolled<8>(keys, values); break;
case 316: res = unrolled<16>(keys, values); break;
case 52: res = embedded_states_unrolled<2>(keys, values); break;
case 54: res = embedded_states_unrolled<4>(keys, values); break;
case 56: res = embedded_states_unrolled<6>(keys, values); break;
case 58: res = embedded_states_unrolled<8>(keys, values); break;
case 516: res = embedded_states_unrolled<16>(keys, values); break;
case 62: res = simple_lookup_table_unrolled<2>(keys, values); break;
case 64: res = simple_lookup_table_unrolled<4>(keys, values); break;
case 66: res = simple_lookup_table_unrolled<6>(keys, values); break;
case 68: res = simple_lookup_table_unrolled<8>(keys, values); break;
case 616: res = simple_lookup_table_unrolled<16>(keys, values); break;
case 72: res = simple_lookup_table_embedded_states_unrolled<2>(keys, values); break;
case 74: res = simple_lookup_table_embedded_states_unrolled<4>(keys, values); break;
case 76: res = simple_lookup_table_embedded_states_unrolled<6>(keys, values); break;
case 78: res = simple_lookup_table_embedded_states_unrolled<8>(keys, values); break;
case 716: res = simple_lookup_table_embedded_states_unrolled<16>(keys, values); break;
case 82: res = microsort<2>(keys, values); break;
case 84: res = microsort<4>(keys, values); break;
case 86: res = microsort<6>(keys, values); break;
case 88: res = microsort<8>(keys, values); break;
case 816: res = microsort<16>(keys, values); break;
case 112: res = really_unrolled<2>(keys, values); break;
case 114: res = really_unrolled<4>(keys, values); break;
case 116: res = really_unrolled<5>(keys, values); break;
case 118: res = really_unrolled<8>(keys, values); break;
case 1116: res = really_unrolled<16>(keys, values); break;
default: break;
}
watch.stop();
fmt::print("Aggregated (res = {}) in {} sec., {} million rows/sec., {} MiB/sec.\n",
res,
watch.elapsedSeconds(),
size_t(size / watch.elapsedSeconds() / 1000000),
size_t(size * (sizeof(Float) + sizeof(UInt8)) / watch.elapsedSeconds() / 1000000));
return 0;
}

View File

@ -1,27 +1,27 @@
6931467.646716369
6931468.33986355
6931469.0330107305
6931469.726157911
6931470.419305092
6931471.112452272
3465734.169931768
3465734.8630789486
3465735.5562261306
3465736.24937331
3465736.94252049
3465735.209652544
3465735.209652544
3465735.5562261483
3465735.9027997246
3465735.902799725
3465734.516505364
3465735.209652544
3465735.209652544
3465735.9027997246
3465735.902799725
3465736.595946905
3465735.2096525617
3465735.9027997428
6931467.646716
6931468.339864
6931469.033011
6931469.726158
6931470.419305
6931471.112452
3465734.169932
3465734.863079
3465735.556226
3465736.249373
3465736.94252
3465735.209653
3465735.209653
3465735.556226
3465735.9028
3465735.9028
3465734.516505
3465735.209653
3465735.209653
3465735.9028
3465735.9028
3465736.595947
3465735.209653
3465735.9028
SELECT max(log(2) * number) AS k
FROM numbers(10000000)
GROUP BY
@ -51,30 +51,30 @@ GROUP BY
(number % 2) % 3,
number % 2
ORDER BY k ASC
6931467.646716369
6931468.33986355
6931469.0330107305
6931469.726157911
6931470.419305092
6931471.112452272
3465734.169931768
3465734.8630789486
3465735.5562261306
3465736.24937331
3465736.94252049
3465735.209652544
3465735.209652544
3465735.5562261483
3465735.9027997246
3465735.902799725
3465734.516505364
3465735.209652544
3465735.209652544
3465735.9027997246
3465735.902799725
3465736.595946905
3465735.2096525617
3465735.9027997428
6931467.646716
6931468.339864
6931469.033011
6931469.726158
6931470.419305
6931471.112452
3465734.169932
3465734.863079
3465735.556226
3465736.249373
3465736.94252
3465735.209653
3465735.209653
3465735.556226
3465735.9028
3465735.9028
3465734.516505
3465735.209653
3465735.209653
3465735.9028
3465735.9028
3465736.595947
3465735.209653
3465735.9028
SELECT max(log(2) * number) AS k
FROM numbers(10000000)
GROUP BY

View File

@ -1,11 +1,11 @@
set optimize_group_by_function_keys = 1;
set enable_debug_queries = 1;
SELECT max(log(2) * number) AS k FROM numbers(10000000) GROUP BY number % 2, number % 3, (number % 2 + number % 3) % 2 ORDER BY k;
SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY number % 5, ((number % 5) * (number % 5)) ORDER BY k;
SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY (number % 2) * (number % 3), number % 3 ORDER BY k;
SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY (number % 2) * (number % 3), number % 3, number % 2 ORDER BY k;
SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY (number % 2) % 3, number % 2 ORDER BY k;
SELECT round(max(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY number % 2, number % 3, (number % 2 + number % 3) % 2 ORDER BY k;
SELECT round(avg(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY number % 5, ((number % 5) * (number % 5)) ORDER BY k;
SELECT round(avg(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY (number % 2) * (number % 3), number % 3 ORDER BY k;
SELECT round(avg(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY (number % 2) * (number % 3), number % 3, number % 2 ORDER BY k;
SELECT round(avg(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY (number % 2) % 3, number % 2 ORDER BY k;
analyze SELECT max(log(2) * number) AS k FROM numbers(10000000) GROUP BY number % 2, number % 3, (number % 2 + number % 3) % 2 ORDER BY k;
@ -16,11 +16,11 @@ analyze SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY (number
set optimize_group_by_function_keys = 0;
SELECT max(log(2) * number) AS k FROM numbers(10000000) GROUP BY number % 2, number % 3, (number % 2 + number % 3) % 2 ORDER BY k;
SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY number % 5, ((number % 5) * (number % 5)) ORDER BY k;
SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY (number % 2) * (number % 3), number % 3 ORDER BY k;
SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY (number % 2) * (number % 3), number % 3, number % 2 ORDER BY k;
SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY (number % 2) % 3, number % 2 ORDER BY k;
SELECT round(max(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY number % 2, number % 3, (number % 2 + number % 3) % 2 ORDER BY k;
SELECT round(avg(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY number % 5, ((number % 5) * (number % 5)) ORDER BY k;
SELECT round(avg(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY (number % 2) * (number % 3), number % 3 ORDER BY k;
SELECT round(avg(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY (number % 2) * (number % 3), number % 3, number % 2 ORDER BY k;
SELECT round(avg(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY (number % 2) % 3, number % 2 ORDER BY k;
analyze SELECT max(log(2) * number) AS k FROM numbers(10000000) GROUP BY number % 2, number % 3, (number % 2 + number % 3) % 2 ORDER BY k;
analyze SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY number % 5, ((number % 5) * (number % 5)) ORDER BY k;

View File

@ -1,9 +1,9 @@
3465735.9027997246
3465735.902799725
3465736.595946905
3465734.169931768
3465734.8630789486
3465735.5562261306
3465735.9028
3465735.9028
3465736.595947
3465734.169932
3465734.863079
3465735.556226
0
1
4
@ -24,12 +24,12 @@ FROM numbers(10000000)
WHERE ((number % 5) * (number % 5)) < 5
GROUP BY number % 5
ORDER BY k ASC
3465735.9027997246
3465735.902799725
3465736.595946905
3465734.169931768
3465734.8630789486
3465735.5562261306
3465735.9028
3465735.9028
3465736.595947
3465734.169932
3465734.863079
3465735.556226
0
1
4

View File

@ -1,8 +1,8 @@
set optimize_group_by_function_keys = 1;
set enable_debug_queries = 1;
SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY (number % 2) * (number % 3), number % 3, number % 2 HAVING avg(log(2) * number) > 3465735.3 ORDER BY k;
SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY number % 5, ((number % 5) * (number % 5)) HAVING ((number % 5) * (number % 5)) < 5 ORDER BY k;
SELECT round(avg(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY (number % 2) * (number % 3), number % 3, number % 2 HAVING avg(log(2) * number) > 3465735.3 ORDER BY k;
SELECT round(avg(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY number % 5, ((number % 5) * (number % 5)) HAVING ((number % 5) * (number % 5)) < 5 ORDER BY k;
SELECT (number % 5) * (number % 5) AS k FROM numbers(10000000) GROUP BY number % 5, ((number % 5) * (number % 5)) HAVING ((number % 5) * (number % 5)) < 5 ORDER BY k;
@ -12,8 +12,8 @@ analyze SELECT (number % 5) * (number % 5) AS k FROM numbers(10000000) GROUP BY
set optimize_group_by_function_keys = 0;
SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY (number % 2) * (number % 3), number % 3, number % 2 HAVING avg(log(2) * number) > 3465735.3 ORDER BY k;
SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY number % 5, ((number % 5) * (number % 5)) HAVING ((number % 5) * (number % 5)) < 5 ORDER BY k;
SELECT round(avg(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY (number % 2) * (number % 3), number % 3, number % 2 HAVING avg(log(2) * number) > 3465735.3 ORDER BY k;
SELECT round(avg(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY number % 5, ((number % 5) * (number % 5)) HAVING ((number % 5) * (number % 5)) < 5 ORDER BY k;
SELECT (number % 5) * (number % 5) AS k FROM numbers(10000000) GROUP BY number % 5, ((number % 5) * (number % 5)) HAVING ((number % 5) * (number % 5)) < 5 ORDER BY k;
analyze SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY (number % 2) * (number % 3), number % 3, number % 2 HAVING avg(log(2) * number) > 3465735.3 ORDER BY k;