mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge pull request #63187 from kitaisreal/recursive-cte-documentation
Added recursive CTE documentation
This commit is contained in:
commit
35b5a80e0e
@ -87,3 +87,226 @@ LIMIT 10;
|
||||
WITH test1 AS (SELECT i + 1, j + 1 FROM test1)
|
||||
SELECT * FROM test1;
|
||||
```
|
||||
|
||||
# Recursive Queries
|
||||
|
||||
The optional RECURSIVE modifier allows for a WITH query to refer to its own output. Example:
|
||||
|
||||
**Example:** Sum integers from 1 through 100
|
||||
|
||||
```sql
|
||||
WITH RECURSIVE test_table AS (
|
||||
SELECT 1 AS number
|
||||
UNION ALL
|
||||
SELECT number + 1 FROM test_table WHERE number < 100
|
||||
)
|
||||
SELECT sum(number) FROM test_table;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─sum(number)─┐
|
||||
│ 5050 │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
The general form of a recursive `WITH` query is always a non-recursive term, then `UNION ALL`, then a recursive term, where only the recursive term can contain a reference to the query's own output. Recursive CTE query is executed as follows:
|
||||
|
||||
1. Evaluate the non-recursive term. Place result of non-recursive term query in a temporary working table.
|
||||
2. As long as the working table is not empty, repeat these steps:
|
||||
1. Evaluate the recursive term, substituting the current contents of the working table for the recursive self-reference. Place result of recursive term query in a temporary intermediate table.
|
||||
2. Replace the contents of the working table with the contents of the intermediate table, then empty the intermediate table.
|
||||
|
||||
Recursive queries are typically used to work with hierarchical or tree-structured data. For example, we can write a query that performs tree traversal:
|
||||
|
||||
**Example:** Tree traversal
|
||||
|
||||
First let's create tree table:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS tree;
|
||||
CREATE TABLE tree
|
||||
(
|
||||
id UInt64,
|
||||
parent_id Nullable(UInt64),
|
||||
data String
|
||||
) ENGINE = MergeTree ORDER BY id;
|
||||
|
||||
INSERT INTO tree VALUES (0, NULL, 'ROOT'), (1, 0, 'Child_1'), (2, 0, 'Child_2'), (3, 1, 'Child_1_1');
|
||||
```
|
||||
|
||||
We can traverse those tree with such query:
|
||||
|
||||
**Example:** Tree traversal
|
||||
```sql
|
||||
WITH RECURSIVE search_tree AS (
|
||||
SELECT id, parent_id, data
|
||||
FROM tree t
|
||||
WHERE t.id = 0
|
||||
UNION ALL
|
||||
SELECT t.id, t.parent_id, t.data
|
||||
FROM tree t, search_tree st
|
||||
WHERE t.parent_id = st.id
|
||||
)
|
||||
SELECT * FROM search_tree;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─id─┬─parent_id─┬─data──────┐
|
||||
│ 0 │ ᴺᵁᴸᴸ │ ROOT │
|
||||
│ 1 │ 0 │ Child_1 │
|
||||
│ 2 │ 0 │ Child_2 │
|
||||
│ 3 │ 1 │ Child_1_1 │
|
||||
└────┴───────────┴───────────┘
|
||||
```
|
||||
|
||||
## Search order
|
||||
|
||||
To create a depth-first order, we compute for each result row an array of rows that we have already visited:
|
||||
|
||||
**Example:** Tree traversal depth-first order
|
||||
```sql
|
||||
WITH RECURSIVE search_tree AS (
|
||||
SELECT id, parent_id, data, [t.id] AS path
|
||||
FROM tree t
|
||||
WHERE t.id = 0
|
||||
UNION ALL
|
||||
SELECT t.id, t.parent_id, t.data, arrayConcat(path, [t.id])
|
||||
FROM tree t, search_tree st
|
||||
WHERE t.parent_id = st.id
|
||||
)
|
||||
SELECT * FROM search_tree ORDER BY path;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─id─┬─parent_id─┬─data──────┬─path────┐
|
||||
│ 0 │ ᴺᵁᴸᴸ │ ROOT │ [0] │
|
||||
│ 1 │ 0 │ Child_1 │ [0,1] │
|
||||
│ 3 │ 1 │ Child_1_1 │ [0,1,3] │
|
||||
│ 2 │ 0 │ Child_2 │ [0,2] │
|
||||
└────┴───────────┴───────────┴─────────┘
|
||||
```
|
||||
|
||||
To create a breadth-first order, standard approach is to add column that tracks the depth of the search:
|
||||
|
||||
**Example:** Tree traversal breadth-first order
|
||||
```sql
|
||||
WITH RECURSIVE search_tree AS (
|
||||
SELECT id, parent_id, data, [t.id] AS path, toUInt64(0) AS depth
|
||||
FROM tree t
|
||||
WHERE t.id = 0
|
||||
UNION ALL
|
||||
SELECT t.id, t.parent_id, t.data, arrayConcat(path, [t.id]), depth + 1
|
||||
FROM tree t, search_tree st
|
||||
WHERE t.parent_id = st.id
|
||||
)
|
||||
SELECT * FROM search_tree ORDER BY depth;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─id─┬─link─┬─data──────┬─path────┬─depth─┐
|
||||
│ 0 │ ᴺᵁᴸᴸ │ ROOT │ [0] │ 0 │
|
||||
│ 1 │ 0 │ Child_1 │ [0,1] │ 1 │
|
||||
│ 2 │ 0 │ Child_2 │ [0,2] │ 1 │
|
||||
│ 3 │ 1 │ Child_1_1 │ [0,1,3] │ 2 │
|
||||
└────┴──────┴───────────┴─────────┴───────┘
|
||||
```
|
||||
|
||||
## Cycle detection
|
||||
|
||||
First let's create graph table:
|
||||
|
||||
```sql
|
||||
DROP TABLE IF EXISTS graph;
|
||||
CREATE TABLE graph
|
||||
(
|
||||
from UInt64,
|
||||
to UInt64,
|
||||
label String
|
||||
) ENGINE = MergeTree ORDER BY (from, to);
|
||||
|
||||
INSERT INTO graph VALUES (1, 2, '1 -> 2'), (1, 3, '1 -> 3'), (2, 3, '2 -> 3'), (1, 4, '1 -> 4'), (4, 5, '4 -> 5');
|
||||
```
|
||||
|
||||
We can traverse that graph with such query:
|
||||
|
||||
**Example:** Graph traversal without cycle detection
|
||||
```sql
|
||||
WITH RECURSIVE search_graph AS (
|
||||
SELECT from, to, label FROM graph g
|
||||
UNION ALL
|
||||
SELECT g.from, g.to, g.label
|
||||
FROM graph g, search_graph sg
|
||||
WHERE g.from = sg.to
|
||||
)
|
||||
SELECT DISTINCT * FROM search_graph ORDER BY from;
|
||||
```
|
||||
```text
|
||||
┌─from─┬─to─┬─label──┐
|
||||
│ 1 │ 4 │ 1 -> 4 │
|
||||
│ 1 │ 2 │ 1 -> 2 │
|
||||
│ 1 │ 3 │ 1 -> 3 │
|
||||
│ 2 │ 3 │ 2 -> 3 │
|
||||
│ 4 │ 5 │ 4 -> 5 │
|
||||
└──────┴────┴────────┘
|
||||
```
|
||||
|
||||
But if we add cycle in that graph, previous query will fail with `Maximum recursive CTE evaluation depth` error:
|
||||
|
||||
```sql
|
||||
INSERT INTO graph VALUES (5, 1, '5 -> 1');
|
||||
|
||||
WITH RECURSIVE search_graph AS (
|
||||
SELECT from, to, label FROM graph g
|
||||
UNION ALL
|
||||
SELECT g.from, g.to, g.label
|
||||
FROM graph g, search_graph sg
|
||||
WHERE g.from = sg.to
|
||||
)
|
||||
SELECT DISTINCT * FROM search_graph ORDER BY from;
|
||||
```
|
||||
|
||||
```text
|
||||
Code: 306. DB::Exception: Received from localhost:9000. DB::Exception: Maximum recursive CTE evaluation depth (1000) exceeded, during evaluation of search_graph AS (SELECT from, to, label FROM graph AS g UNION ALL SELECT g.from, g.to, g.label FROM graph AS g, search_graph AS sg WHERE g.from = sg.to). Consider raising max_recursive_cte_evaluation_depth setting.: While executing RecursiveCTESource. (TOO_DEEP_RECURSION)
|
||||
```
|
||||
|
||||
The standard method for handling cycles is to compute an array of the already visited nodes:
|
||||
|
||||
**Example:** Graph traversal with cycle detection
|
||||
```sql
|
||||
WITH RECURSIVE search_graph AS (
|
||||
SELECT from, to, label, false AS is_cycle, [tuple(g.from, g.to)] AS path FROM graph g
|
||||
UNION ALL
|
||||
SELECT g.from, g.to, g.label, has(path, tuple(g.from, g.to)), arrayConcat(sg.path, [tuple(g.from, g.to)])
|
||||
FROM graph g, search_graph sg
|
||||
WHERE g.from = sg.to AND NOT is_cycle
|
||||
)
|
||||
SELECT * FROM search_graph WHERE is_cycle ORDER BY from;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─from─┬─to─┬─label──┬─is_cycle─┬─path──────────────────────┐
|
||||
│ 1 │ 4 │ 1 -> 4 │ true │ [(1,4),(4,5),(5,1),(1,4)] │
|
||||
│ 4 │ 5 │ 4 -> 5 │ true │ [(4,5),(5,1),(1,4),(4,5)] │
|
||||
│ 5 │ 1 │ 5 -> 1 │ true │ [(5,1),(1,4),(4,5),(5,1)] │
|
||||
└──────┴────┴────────┴──────────┴───────────────────────────┘
|
||||
```
|
||||
|
||||
## Infinite queries
|
||||
|
||||
It is also possible to use infinite recursive CTE queries if `LIMIT` is used in outer query:
|
||||
|
||||
**Example:** Infinite recursive CTE query
|
||||
```sql
|
||||
WITH RECURSIVE test_table AS (
|
||||
SELECT 1 AS number
|
||||
UNION ALL
|
||||
SELECT number + 1 FROM test_table
|
||||
)
|
||||
SELECT sum(number) FROM (SELECT number FROM test_table LIMIT 100);
|
||||
```
|
||||
|
||||
```text
|
||||
┌─sum(number)─┐
|
||||
│ 5050 │
|
||||
└─────────────┘
|
||||
```
|
||||
|
Loading…
Reference in New Issue
Block a user