mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Add original and new Annoy
This commit is contained in:
parent
4a9fbdf4dc
commit
2f1213d25f
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -90,6 +90,7 @@ add_contrib (openldap-cmake openldap)
|
||||
add_contrib (grpc-cmake grpc)
|
||||
add_contrib (msgpack-c-cmake msgpack-c)
|
||||
add_contrib (spotify-annoy-cmake spotify-annoy)
|
||||
add_contrib (annoy-cmake annoy)
|
||||
|
||||
if (ENABLE_FUZZING)
|
||||
add_contrib (libprotobuf-mutator-cmake libprotobuf-mutator)
|
||||
|
21
contrib/annoy-cmake/CMakeLists.txt
Normal file
21
contrib/annoy-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,21 @@
|
||||
set(ANNOY_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/annoy")
|
||||
set(ANNOY_SOURCE_DIR "${ANNOY_PROJECT_DIR}/src")
|
||||
set(ANNOY_INCLUDE_DIR ${ANNOY_PROJECT_DIR}/include)
|
||||
|
||||
set(ANNOY_HEADERS
|
||||
${ANNOY_INCLUDE_DIR}/annoy.h
|
||||
${ANNOY_INCLUDE_DIR}/node.h
|
||||
${ANNOY_INCLUDE_DIR}/point.h
|
||||
)
|
||||
|
||||
set(ANNOY_SRC
|
||||
${ANNOY_SOURCE_DIR}/annoy.cpp
|
||||
${ANNOY_SOURCE_DIR}/node.cpp
|
||||
${ANNOY_SOURCE_DIR}/point.cpp
|
||||
${ANNOY_SOURCE_DIR}/settings.cpp
|
||||
)
|
||||
|
||||
add_library(_annoy ${ANNOY_SRC})
|
||||
target_include_directories(_annoy SYSTEM PUBLIC ${ANNOY_INCLUDE_DIR})
|
||||
|
||||
add_library(ch_contrib::annoy ALIAS _annoy)
|
1
contrib/annoy/Readme.md
Normal file
1
contrib/annoy/Readme.md
Normal file
@ -0,0 +1 @@
|
||||
Algorithm was inspired by the https://github.com/spotify/annoy.
|
@ -5,6 +5,8 @@
|
||||
|
||||
#include "node.h"
|
||||
|
||||
namespace Annoy {
|
||||
|
||||
class Annoy {
|
||||
using Point = std::vector<double>;
|
||||
public:
|
||||
@ -15,4 +17,6 @@ class Annoy {
|
||||
std::shared_ptr<const std::vector<Point>> points_;
|
||||
std::vector<std::shared_ptr<Node>> trees_;
|
||||
int dim_;
|
||||
};
|
||||
};
|
||||
|
||||
};
|
@ -5,6 +5,7 @@
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
namespace Annoy {
|
||||
|
||||
struct Node {
|
||||
using Point = std::vector<double>;
|
||||
@ -35,3 +36,5 @@ struct Node {
|
||||
private:
|
||||
void GenerateLine(InnerData& inner_node_data, const std::vector<size_t>& indexes);
|
||||
};
|
||||
|
||||
};
|
@ -3,6 +3,8 @@
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
|
||||
namespace Annoy {
|
||||
|
||||
using Point = std::vector<double>;
|
||||
|
||||
double ScalarMul(const Point& first, const Point& second);
|
||||
@ -13,4 +15,6 @@ Point operator-(const Point& point);
|
||||
|
||||
Point operator-(const Point& first, const Point& second);
|
||||
|
||||
Point operator*(const Point& first, double k);
|
||||
Point operator*(const Point& first, double k);
|
||||
|
||||
};
|
@ -1,4 +0,0 @@
|
||||
#include <cstddef>
|
||||
|
||||
const size_t NUM_OF_TREES = 3;
|
||||
const size_t MAX_LEAF_NODE_SIZE = 1;
|
@ -3,13 +3,14 @@
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
#include "annoy.h"
|
||||
#include "settings.h"
|
||||
#include "point.h"
|
||||
#include "annoy/annoy.h"
|
||||
#include "annoy/point.h"
|
||||
|
||||
#include "settings.cpp"
|
||||
|
||||
namespace Annoy {
|
||||
|
||||
Annoy::Annoy(const std::vector<Point>& points) : points_(std::make_shared<const std::vector<Point>>(points)), trees_(NUM_OF_TREES) {
|
||||
assert(!points.empty());
|
||||
std::srand(std::time(nullptr));
|
||||
std::vector<size_t> indexes(points_->size());
|
||||
for (int i = 0; i < points_->size(); ++i) {
|
||||
@ -53,4 +54,6 @@ std::vector<Point> Annoy::FindKNN(const Point& x, size_t k) const {
|
||||
result[i] = (*points_)[candidates[i].first];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
@ -1,6 +1,8 @@
|
||||
#include "node.h"
|
||||
#include "point.h"
|
||||
#include "settings.h"
|
||||
#include "annoy/node.h"
|
||||
#include "annoy/point.h"
|
||||
#include "settings.cpp"
|
||||
|
||||
namespace Annoy {
|
||||
|
||||
const double EPS = 1e-5;
|
||||
|
||||
@ -44,3 +46,5 @@ void Node::GenerateLine(InnerData& inner_node_data, const std::vector<size_t>& i
|
||||
inner_node_data.div_line_point = ((*points)[indexes[i1]] + (*points)[indexes[i2]]) * 0.5;
|
||||
inner_node_data.div_line_norm = (*points)[indexes[i2]] - (*points)[indexes[i1]];
|
||||
}
|
||||
|
||||
};
|
@ -1,4 +1,6 @@
|
||||
#include "point.h"
|
||||
#include "annoy/point.h"
|
||||
|
||||
namespace Annoy {
|
||||
|
||||
double ScalarMul(const Point& first, const Point& second) {
|
||||
double sum = 0.;
|
||||
@ -30,4 +32,6 @@ Point operator*(const Point& point, double k) {
|
||||
result[i] *= k;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
8
contrib/annoy/src/settings.cpp
Normal file
8
contrib/annoy/src/settings.cpp
Normal file
@ -0,0 +1,8 @@
|
||||
#include <cstddef>
|
||||
|
||||
namespace Annoy {
|
||||
|
||||
const size_t NUM_OF_TREES = 3;
|
||||
const size_t MAX_LEAF_NODE_SIZE = 1;
|
||||
|
||||
};
|
@ -1,16 +1,17 @@
|
||||
cmake_minimum_required(VERSION 3.14)
|
||||
|
||||
add_library(SpotifyAnnoy INTERFACE)
|
||||
|
||||
set(SPOTIFY_ANNOY_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/sporify-annoy")
|
||||
set(SPOTIFY_ANNOY_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/spotify-annoy")
|
||||
set(SPOTIFY_ANNOY_SOURCE_DIR "${SPOTIFY_ANNOY_PROJECT_DIR}/src")
|
||||
set(SPOTIFY_ANNOY_INCLUDE_DIR ${SPOTIFY_ANNOY_PROJECT_DIR}/include/annoy)
|
||||
set(SPOTIFY_ANNOY_INCLUDE_DIR ${SPOTIFY_ANNOY_PROJECT_DIR}/include)
|
||||
|
||||
file(MAKE_DIRECTORY ${SPOTIFY_ANNOY_INCLUDE_DIR})
|
||||
foreach (HEADER annoylib.h kissrandom.h mman.h)
|
||||
file(COPY ${SPOTIFY_ANNOY_PROJECT_DIR}/src/${HEADER} DESTINATION ${SPOTIFY_ANNOY_INCLUDE_DIR})
|
||||
endforeach()
|
||||
set(SPOTIFY_ANNOY_HEADERS
|
||||
${SPOTIFY_ANNOY_SOURCE_DIR}/annoylib.h
|
||||
${SPOTIFY_ANNOY_SOURCE_DIR}/kissrandom.h
|
||||
)
|
||||
|
||||
target_include_directories(SpotifyAnnoy INTERFACE include/)
|
||||
set(SPOTIFY_ANNOY_SRC
|
||||
${SPOTIFY_ANNOY_SOURCE_DIR}/mman.h
|
||||
)
|
||||
|
||||
add_library(ch::contrib::spotify-annoy ALIAS SpotifyAnnoy)
|
||||
add_library(_spotify_annoy ${SPOTIFY_ANNOY_SRC})
|
||||
target_include_directories(_spotify_annoy SYSTEM PUBLIC ${SPOTIFY_ANNOY_SOURCE_DIR})
|
||||
|
||||
add_library(ch_contrib::spotify-annoy ALIAS _spotify_annoy)
|
||||
|
@ -523,6 +523,7 @@ endif()
|
||||
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::consistent_hashing)
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::spotify-annoy)
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::annoy)
|
||||
|
||||
include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")
|
||||
|
||||
|
@ -1,21 +0,0 @@
|
||||
#include <Storages/MergeTree/MergeTreeIndexSimpleHnsw.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
MergeTreeIndexGranuleSimpleHnsw::MergeTreeIndexGranuleSimpleHnsw(const String & index_name_, const Block & index_sample_block_)
|
||||
: index_name(index_name_)
|
||||
, index_sample_block(index_sample_block_)
|
||||
{}
|
||||
|
||||
|
||||
void MergeTreeIndexGranuleMinMax::serializeBinary(WriteBuffer & /*ostr*/) const{}
|
||||
|
||||
void MergeTreeIndexGranuleMinMax::deserializeBinary(ReadBuffer & /*istr*/, MergeTreeIndexVersion /*version*/){}
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/MergeTree/MergeTreeIndices.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/MergeTree/KeyCondition.h>
|
||||
#include "Storages/MergeTree/MergeTreeIndexMinMax.h"
|
||||
|
||||
#include <memory>
|
||||
#include "object.h"
|
||||
|
||||
#include <spotify-annoy>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct MergeTreeIndexGranuleSimpleSpotifyAnnoy final : public IMergeTreeIndexGranule
|
||||
{
|
||||
MergeTreeIndexGranuleSimpleSpotifyAnnoy(const String & index_name_, const Block & index_sample_block_);
|
||||
~MergeTreeIndexGranuleSimpleSpotifyAnnoy() override = default;
|
||||
|
||||
void serializeBinary(WriteBuffer & ostr) const override;
|
||||
void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override;
|
||||
|
||||
bool empty() const override { return true; }
|
||||
|
||||
String index_name;
|
||||
Block index_sample_block;
|
||||
similarity::ObjectVector batch_data;
|
||||
};
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user