mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Merge pull request #50942 from ClickHouse/fix-ann-page
Fixes to ANN docs page
This commit is contained in:
commit
22670aebf8
@ -54,7 +54,7 @@ CREATE TABLE table
|
||||
(
|
||||
`id` Int64,
|
||||
`vectors` Array(Float32),
|
||||
INDEX <ann_index_name> vectors TYPE <ann_index_type>(<ann_index_parameters>) [GRANULARITY <N>]
|
||||
INDEX [ann_index_name vectors TYPE [ann_index_type]([ann_index_parameters]) [GRANULARITY [N]]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
@ -67,7 +67,7 @@ CREATE TABLE table
|
||||
(
|
||||
`id` Int64,
|
||||
`vectors` Tuple(Float32[, Float32[, ...]]),
|
||||
INDEX <ann_index_name> vectors TYPE <ann_index_type>(<ann_index_parameters>) [GRANULARITY <N>]
|
||||
INDEX [ann_index_name] vectors TYPE [ann_index_type]([ann_index_parameters]) [GRANULARITY [N]]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
@ -114,7 +114,7 @@ without `LIMIT` clause cannot utilize ANN indexes. Also ANN indexes are only use
|
||||
approximate neighbor search.
|
||||
|
||||
**Differences to Skip Indexes** Similar to regular [skip indexes](https://clickhouse.com/docs/en/optimize/skipping-indexes), ANN indexes are
|
||||
constructed over granules and each indexed block consists of `GRANULARITY = <N>`-many granules (`<N>` = 1 by default for normal skip
|
||||
constructed over granules and each indexed block consists of `GRANULARITY = [N]`-many granules (`[N]` = 1 by default for normal skip
|
||||
indexes). For example, if the primary index granularity of the table is 8192 (setting `index_granularity = 8192`) and `GRANULARITY = 2`,
|
||||
then each indexed block will contain 16384 rows. However, data structures and algorithms for approximate neighborhood search (usually
|
||||
provided by external libraries) are inherently row-oriented. They store a compact representation of a set of rows and also return rows for
|
||||
@ -130,7 +130,8 @@ skip data at the granularity of index blocks.
|
||||
The `GRANULARITY` parameter determines how many ANN sub-indexes are created. Bigger `GRANULARITY` values mean fewer but larger ANN
|
||||
sub-indexes, up to the point where a column (or a column's data part) has only a single sub-index. In that case, the sub-index has a
|
||||
"global" view of all column rows and can directly return all granules of the column (part) with relevant rows (there are at most
|
||||
`LIMIT <N>`-many such granules). In a second step, ClickHouse will load these granules and identify the actually best rows by performing a
|
||||
`LIMIT [N]`-many such granules). In a second step, ClickHouse will load these granules and identify the actually best rows by performing a
|
||||
|
||||
brute-force distance calculation over all rows of the granules. With a small `GRANULARITY` value, each of the sub-indexes returns up to
|
||||
`LIMIT N`-many granules. As a result, more granules need to be loaded and post-filtered. Note that the search accuracy is with both cases
|
||||
equally good, only the processing performance differs. It is generally recommended to use a large `GRANULARITY` for ANN indexes and fall
|
||||
@ -169,7 +170,7 @@ CREATE TABLE table
|
||||
(
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX <ann_index_name> vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N]
|
||||
INDEX [ann_index_name] vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
@ -182,7 +183,7 @@ CREATE TABLE table
|
||||
(
|
||||
id Int64,
|
||||
vectors Tuple(Float32[, Float32[, ...]]),
|
||||
INDEX <ann_index_name> vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N]
|
||||
INDEX [ann_index_name] vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
|
Loading…
Reference in New Issue
Block a user