mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-28 18:42:26 +00:00
part besides reader
This commit is contained in:
parent
f14b9ec2af
commit
0914c1bb1e
23
src/Storages/MergeTree/IMergeTreeIndexReturnIdCondition.h
Normal file
23
src/Storages/MergeTree/IMergeTreeIndexReturnIdCondition.h
Normal file
@ -0,0 +1,23 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/MergeTree/IMergeTreeIndices.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IMergeTreeIndexReturnIdCondition : public IMergeTreeIndexCondition {
|
||||
public:
|
||||
virtual ~IMergeTreeIndexReturnIdCondition() override = default;
|
||||
|
||||
virtual bool alwaysUnknownOrTrue() const override = 0;
|
||||
|
||||
virtual bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const override = 0;
|
||||
|
||||
virtual std::vector<int32_t> returnIdRecords(MergeTreeIndexGranulePtr granule) const = 0;
|
||||
};
|
||||
|
||||
|
||||
using MergeTreeIndexReturnIdConditionPtr = std::shared_ptr<IMergeTreeIndexReturnIdCondition>;
|
||||
using MergeTreeIndexReturnIdConditions = std::vector<MergeTreeIndexReturnIdConditionPtr>;
|
||||
|
||||
}
|
15
src/Storages/MergeTree/MarkRangeSelective.h
Normal file
15
src/Storages/MergeTree/MarkRangeSelective.h
Normal file
@ -0,0 +1,15 @@
|
||||
#include <Storage/MergeTree/MarkRange.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct MarkRangeSelective
|
||||
{
|
||||
MarkRangeSelective() = default;
|
||||
MarkRangeSelective(MarkRange range, const std::vector<size_t>& selected) : MarkRange(range), selected(selected) {}
|
||||
MarkRangeSelective(MarkRange range, std::vector<size_t>&& selected) : MarkRange(range), selected(std::move(selected)) {}
|
||||
|
||||
std::vector<size_t> selected;
|
||||
}
|
||||
|
||||
}
|
@ -41,6 +41,9 @@
|
||||
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
|
||||
#include <IO/WriteBufferFromOStream.h>
|
||||
|
||||
#include <Storages/MergeTree/IMergeTreeIndexReturnIdCondition.h>
|
||||
#include <Storages/MergeTree/MarkRangeSelective.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -1545,6 +1548,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
|
||||
|
||||
MarkRanges res;
|
||||
|
||||
auto return_id_condition = dynamic_cast<MergeTreeIndexReturnIdConditionPtr>(condition);
|
||||
|
||||
/// Some granules can cover two or more ranges,
|
||||
/// this variable is stored to avoid reading the same granule twice.
|
||||
MergeTreeIndexGranulePtr granule = nullptr;
|
||||
@ -1573,6 +1578,10 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
|
||||
continue;
|
||||
}
|
||||
|
||||
if (return_id_condition) {
|
||||
res.push_back(MarkRangeSelected(data_range, return_id_condition->returnIdRecords(granule)));
|
||||
}
|
||||
|
||||
if (res.empty() || res.back().end - data_range.begin > min_marks_for_seek)
|
||||
res.push_back(data_range);
|
||||
else
|
||||
|
@ -28,6 +28,8 @@ namespace Annoy
|
||||
const int NUM_OF_TREES = 20;
|
||||
const int DIMENSION = 512;
|
||||
|
||||
const int32_t LIMIT = 10;
|
||||
|
||||
template<typename Dist>
|
||||
void AnnoyIndexSerialize<Dist>::serialize(WriteBuffer& ostr) const
|
||||
{
|
||||
@ -197,6 +199,27 @@ bool MergeTreeIndexConditionAnnoy::mayBeTrueOnGranule(MergeTreeIndexGranulePtr i
|
||||
return dist[0] < min_distance;
|
||||
}
|
||||
|
||||
std::vector<int32_t> MergeTreeIndexConditionAnnoy::returnIdRecords(MergeTreeIndexGranulePtr idx_granule) const {
|
||||
// TODO: Change assert to the exception
|
||||
assert(expression.has_value());
|
||||
|
||||
std::vector<int32_t> items;
|
||||
items.reserve(LIMIT);
|
||||
|
||||
std::vector<float> target_vec = expression.value().target;
|
||||
float min_distance = expression.value().distance;
|
||||
|
||||
auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleAnnoy>(idx_granule);
|
||||
auto annoy = std::dynamic_pointer_cast<Annoy::AnnoyIndexSerialize<>>(granule->index_base);
|
||||
|
||||
|
||||
// 1 - num of nearest neighbour (NN)
|
||||
// next number - upper limit on the size of the internal queue; -1 means, that it is equal to num of trees * num of NN
|
||||
annoy->get_nns_by_vector(&target_vec[0], LIMIT, 200, &items, NULL);
|
||||
return items;
|
||||
}
|
||||
|
||||
|
||||
bool MergeTreeIndexConditionAnnoy::alwaysUnknownOrTrue() const
|
||||
{
|
||||
return !expression.has_value();
|
||||
|
@ -4,6 +4,8 @@
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/MergeTree/KeyCondition.h>
|
||||
|
||||
#include <Storages/MergeTree/IMergeTreeIndexReturnIdCondition.h>
|
||||
|
||||
#include <annoylib.h>
|
||||
#include <kissrandom.h>
|
||||
|
||||
@ -69,7 +71,7 @@ struct MergeTreeIndexAggregatorAnnoy final : IMergeTreeIndexAggregator
|
||||
};
|
||||
|
||||
|
||||
class MergeTreeIndexConditionAnnoy final : public IMergeTreeIndexCondition
|
||||
class MergeTreeIndexConditionAnnoy final : public IMergeTreeIndexReturnIdCondition
|
||||
{
|
||||
public:
|
||||
MergeTreeIndexConditionAnnoy(
|
||||
@ -81,6 +83,8 @@ public:
|
||||
|
||||
bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override;
|
||||
|
||||
std::vector<int32_t> returnIdRecords(MergeTreeIndexGranulePtr granule) const override;
|
||||
|
||||
~MergeTreeIndexConditionAnnoy() override = default;
|
||||
private:
|
||||
// Type of the vector to use as a target in the distance function
|
||||
|
Loading…
Reference in New Issue
Block a user