ClickHouse/src/Interpreters/RowRefs.h

166 lines
4.1 KiB
C++
Raw Normal View History

2019-03-30 21:30:21 +00:00
#pragma once
#include <algorithm>
#include <cassert>
2019-04-02 18:50:35 +00:00
#include <list>
#include <mutex>
#include <optional>
#include <variant>
2022-01-30 19:49:48 +00:00
2022-02-04 17:02:41 +00:00
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnVector.h>
#include <Columns/IColumn.h>
2022-07-29 16:30:50 +00:00
#include <Core/Joins.h>
#include <base/sort.h>
#include <Common/Arena.h>
2022-01-30 19:49:48 +00:00
2019-03-30 21:30:21 +00:00
namespace DB
{
class Block;
/// Reference to the row in block.
2022-02-19 10:16:28 +00:00
struct RowRef
2019-03-30 21:30:21 +00:00
{
2020-04-21 19:01:34 +00:00
using SizeT = uint32_t; /// Do not use size_t cause of memory economy
2019-03-30 21:30:21 +00:00
const Block * block = nullptr;
2020-04-21 19:01:34 +00:00
SizeT row_num = 0;
2019-03-30 21:30:21 +00:00
2022-02-18 10:02:14 +00:00
RowRef() = default;
2019-03-30 21:30:21 +00:00
RowRef(const Block * block_, size_t row_num_) : block(block_), row_num(row_num_) {}
};
/// Single linked list of references to rows. Used for ALL JOINs (non-unique JOINs)
struct RowRefList : RowRef
{
2019-05-14 14:40:43 +00:00
/// Portion of RowRefs, 16 * (MAX_SIZE + 1) bytes sized.
struct Batch
{
static constexpr size_t MAX_SIZE = 7; /// Adequate values are 3, 7, 15, 31.
2020-04-21 19:01:34 +00:00
SizeT size = 0; /// It's smaller than size_t but keeps align in Arena.
2019-05-14 14:40:43 +00:00
Batch * next;
RowRef row_refs[MAX_SIZE];
explicit Batch(Batch * parent)
2019-05-14 14:40:43 +00:00
: next(parent)
{}
bool full() const { return size == MAX_SIZE; }
Batch * insert(RowRef && row_ref, Arena & pool)
{
if (full())
{
auto * batch = pool.alloc<Batch>();
2019-05-14 14:40:43 +00:00
*batch = Batch(this);
batch->insert(std::move(row_ref), pool);
return batch;
}
row_refs[size++] = std::move(row_ref);
return this;
}
};
2019-05-14 14:39:03 +00:00
class ForwardIterator
{
public:
explicit ForwardIterator(const RowRefList * begin)
2019-05-14 14:40:43 +00:00
: root(begin)
, first(true)
, batch(root->next)
, position(0)
2019-05-14 14:39:03 +00:00
{}
2019-05-14 14:40:43 +00:00
const RowRef * operator -> () const
{
if (first)
return root;
return &batch->row_refs[position];
}
const RowRef * operator * () const
{
if (first)
return root;
return &batch->row_refs[position];
}
2019-05-14 14:40:43 +00:00
void operator ++ ()
{
if (first)
{
first = false;
return;
}
if (batch)
{
++position;
if (position >= batch->size)
{
batch = batch->next;
position = 0;
}
}
}
bool ok() const { return first || batch; }
2019-05-14 14:39:03 +00:00
private:
2019-05-14 14:40:43 +00:00
const RowRefList * root;
bool first;
Batch * batch;
size_t position;
2019-05-14 14:39:03 +00:00
};
2019-03-30 21:30:21 +00:00
RowRefList() {} /// NOLINT
2019-03-30 21:30:21 +00:00
RowRefList(const Block * block_, size_t row_num_) : RowRef(block_, row_num_) {}
2019-05-14 14:39:03 +00:00
ForwardIterator begin() const { return ForwardIterator(this); }
/// insert element after current one
void insert(RowRef && row_ref, Arena & pool)
{
2019-05-14 14:40:43 +00:00
if (!next)
{
next = pool.alloc<Batch>();
*next = Batch(nullptr);
}
next = next->insert(std::move(row_ref), pool);
2019-05-14 14:39:03 +00:00
}
private:
2019-05-14 14:40:43 +00:00
Batch * next = nullptr;
2019-03-30 21:30:21 +00:00
};
/**
* This class is intended to push sortable data into.
* When looking up values the container ensures that it is sorted for log(N) lookup
* After calling any of the lookup methods, it is no longer allowed to insert more data as this would invalidate the
* references that can be returned by the lookup methods
*/
struct SortedLookupVectorBase
{
SortedLookupVectorBase() = default;
2022-03-23 11:19:38 +00:00
virtual ~SortedLookupVectorBase() = default;
static std::optional<TypeIndex> getTypeSize(const IColumn & asof_column, size_t & type_size);
// This will be synchronized by the rwlock mutex in Join.h
virtual void insert(const IColumn &, const Block *, size_t) = 0;
// This needs to be synchronized internally
2022-03-23 11:19:38 +00:00
virtual RowRef findAsof(const IColumn &, size_t) = 0;
};
2019-03-30 21:30:21 +00:00
2022-02-18 13:05:42 +00:00
// It only contains a std::unique_ptr which is memmovable.
2022-02-18 10:02:14 +00:00
// Source: https://github.com/ClickHouse/ClickHouse/issues/4906
using AsofRowRefs = std::unique_ptr<SortedLookupVectorBase>;
2022-07-29 16:30:50 +00:00
AsofRowRefs createAsofRowRef(TypeIndex type, ASOFJoinInequality inequality);
2019-03-31 10:56:54 +00:00
}