mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Add DISTINCT to INTERSECT and EXCEPT
This commit is contained in:
parent
b987ed5037
commit
33770303a0
@ -23,7 +23,9 @@ FROM table2
|
||||
```
|
||||
The condition could be any expression based on your requirements.
|
||||
|
||||
**Examples**
|
||||
## Examples
|
||||
|
||||
Here is a simple example that returns the numbers 1 to 10 that are _not_ a part of the numbers 3 to 8:
|
||||
|
||||
Query:
|
||||
|
||||
@ -33,7 +35,7 @@ SELECT number FROM numbers(1,10) EXCEPT SELECT number FROM numbers(3,6);
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
```response
|
||||
┌─number─┐
|
||||
│ 1 │
|
||||
│ 2 │
|
||||
@ -42,28 +44,109 @@ Result:
|
||||
└────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
`EXCEPT` and `INTERSECT` can often be used interchangeably with different Boolean logic, and they are both useful if you have two tables that share a common column (or columns). For example, suppose we have a few million rows of historical cryptocurrency data that contains trade prices and volume:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE t1(one String, two String, three String) ENGINE=Memory();
|
||||
CREATE TABLE t2(four String, five String, six String) ENGINE=Memory();
|
||||
```sql
|
||||
CREATE TABLE crypto_prices
|
||||
(
|
||||
trade_date Date,
|
||||
crypto_name String,
|
||||
volume Float32,
|
||||
price Float32,
|
||||
market_cap Float32,
|
||||
change_1_day Float32
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PRIMARY KEY (crypto_name, trade_date);
|
||||
|
||||
INSERT INTO t1 VALUES ('q', 'm', 'b'), ('s', 'd', 'f'), ('l', 'p', 'o'), ('s', 'd', 'f'), ('s', 'd', 'f'), ('k', 't', 'd'), ('l', 'p', 'o');
|
||||
INSERT INTO t2 VALUES ('q', 'm', 'b'), ('b', 'd', 'k'), ('s', 'y', 't'), ('s', 'd', 'f'), ('m', 'f', 'o'), ('k', 'k', 'd');
|
||||
INSERT INTO crypto_prices
|
||||
SELECT *
|
||||
FROM s3(
|
||||
'https://learn-clickhouse.s3.us-east-2.amazonaws.com/crypto_prices.csv',
|
||||
'CSVWithNames'
|
||||
);
|
||||
|
||||
SELECT * FROM t1 EXCEPT SELECT * FROM t2;
|
||||
SELECT * FROM crypto_prices
|
||||
WHERE crypto_name = 'Bitcoin'
|
||||
ORDER BY trade_date DESC
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─trade_date─┬─crypto_name─┬──────volume─┬────price─┬───market_cap─┬──change_1_day─┐
|
||||
│ 2020-11-02 │ Bitcoin │ 30771456000 │ 13550.49 │ 251119860000 │ -0.013585099 │
|
||||
│ 2020-11-01 │ Bitcoin │ 24453857000 │ 13737.11 │ 254569760000 │ -0.0031840964 │
|
||||
│ 2020-10-31 │ Bitcoin │ 30306464000 │ 13780.99 │ 255372070000 │ 0.017308505 │
|
||||
│ 2020-10-30 │ Bitcoin │ 30581486000 │ 13546.52 │ 251018150000 │ 0.008084608 │
|
||||
│ 2020-10-29 │ Bitcoin │ 56499500000 │ 13437.88 │ 248995320000 │ 0.012552661 │
|
||||
│ 2020-10-28 │ Bitcoin │ 35867320000 │ 13271.29 │ 245899820000 │ -0.02804481 │
|
||||
│ 2020-10-27 │ Bitcoin │ 33749879000 │ 13654.22 │ 252985950000 │ 0.04427984 │
|
||||
│ 2020-10-26 │ Bitcoin │ 29461459000 │ 13075.25 │ 242251000000 │ 0.0033826586 │
|
||||
│ 2020-10-25 │ Bitcoin │ 24406921000 │ 13031.17 │ 241425220000 │ -0.0058658565 │
|
||||
│ 2020-10-24 │ Bitcoin │ 24542319000 │ 13108.06 │ 242839880000 │ 0.013650347 │
|
||||
└────────────┴─────────────┴─────────────┴──────────┴──────────────┴───────────────┘
|
||||
```
|
||||
|
||||
Now suppose we have a table named `holdings` that contains a list of cryptocurrencies that we own, along with the number of coins:
|
||||
|
||||
```sql
|
||||
CREATE TABLE holdings
|
||||
(
|
||||
crypto_name String,
|
||||
quantity UInt64
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PRIMARY KEY (crypto_name);
|
||||
|
||||
INSERT INTO holdings VALUES
|
||||
('Bitcoin', 1000),
|
||||
('Bitcoin', 200),
|
||||
('Ethereum', 250),
|
||||
('Ethereum', 5000),
|
||||
('DOGEFI', 10);
|
||||
('Bitcoin Diamond', 5000);
|
||||
```
|
||||
|
||||
We can use `EXCEPT` to answer a question like **"Which coins do we own have never traded below $10?"**:
|
||||
|
||||
```sql
|
||||
SELECT crypto_name FROM holdings
|
||||
EXCEPT
|
||||
SELECT crypto_name FROM crypto_prices
|
||||
WHERE price < 10;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─one─┬─two─┬─three─┐
|
||||
│ l │ p │ o │
|
||||
│ k │ t │ d │
|
||||
│ l │ p │ o │
|
||||
└─────┴─────┴───────┘
|
||||
```response
|
||||
┌─crypto_name─┐
|
||||
│ Bitcoin │
|
||||
│ Bitcoin │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
This means of the four cryptocurrencies we own, only Bitcoin has never dropped below $10 (based on the limited data we have here in this example).
|
||||
|
||||
## EXCEPT DISTINCT
|
||||
|
||||
Notice in the previous query we had multiple Bitcoin holdings in the result. You can add `DISTINCT` to `EXCEPT` to eliminate duplicate rows from the result:
|
||||
|
||||
```sql
|
||||
SELECT crypto_name FROM holdings
|
||||
EXCEPT DISTINCT
|
||||
SELECT crypto_name FROM crypto_prices
|
||||
WHERE price < 10;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─crypto_name─┐
|
||||
│ Bitcoin │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
|
||||
**See Also**
|
||||
|
||||
- [UNION](union.md#union-clause)
|
||||
|
@ -24,17 +24,17 @@ FROM table2
|
||||
```
|
||||
The condition could be any expression based on your requirements.
|
||||
|
||||
**Examples**
|
||||
## Examples
|
||||
|
||||
Query:
|
||||
Here is a simple example that intersects the numbers 1 to 10 with the numbers 3 to 8:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT number FROM numbers(1,10) INTERSECT SELECT number FROM numbers(3,6);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
```response
|
||||
┌─number─┐
|
||||
│ 3 │
|
||||
│ 4 │
|
||||
@ -45,29 +45,112 @@ Result:
|
||||
└────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
`INTERSECT` is useful if you have two tables that share a common column (or columns). You can intersect the results of two queries, as long as the results contain the same columns. For example, suppose we have a few million rows of historical cryptocurrency data that contains trade prices and volume:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE t1(one String, two String, three String) ENGINE=Memory();
|
||||
CREATE TABLE t2(four String, five String, six String) ENGINE=Memory();
|
||||
```sql
|
||||
CREATE TABLE crypto_prices
|
||||
(
|
||||
trade_date Date,
|
||||
crypto_name String,
|
||||
volume Float32,
|
||||
price Float32,
|
||||
market_cap Float32,
|
||||
change_1_day Float32
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PRIMARY KEY (crypto_name, trade_date);
|
||||
|
||||
INSERT INTO t1 VALUES ('q', 'm', 'b'), ('s', 'd', 'f'), ('l', 'p', 'o'), ('s', 'd', 'f'), ('s', 'd', 'f'), ('k', 't', 'd'), ('l', 'p', 'o');
|
||||
INSERT INTO t2 VALUES ('q', 'm', 'b'), ('b', 'd', 'k'), ('s', 'y', 't'), ('s', 'd', 'f'), ('m', 'f', 'o'), ('k', 'k', 'd');
|
||||
INSERT INTO crypto_prices
|
||||
SELECT *
|
||||
FROM s3(
|
||||
'https://learn-clickhouse.s3.us-east-2.amazonaws.com/crypto_prices.csv',
|
||||
'CSVWithNames'
|
||||
);
|
||||
|
||||
SELECT * FROM t1 INTERSECT SELECT * FROM t2;
|
||||
SELECT * FROM crypto_prices
|
||||
WHERE crypto_name = 'Bitcoin'
|
||||
ORDER BY trade_date DESC
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─trade_date─┬─crypto_name─┬──────volume─┬────price─┬───market_cap─┬──change_1_day─┐
|
||||
│ 2020-11-02 │ Bitcoin │ 30771456000 │ 13550.49 │ 251119860000 │ -0.013585099 │
|
||||
│ 2020-11-01 │ Bitcoin │ 24453857000 │ 13737.11 │ 254569760000 │ -0.0031840964 │
|
||||
│ 2020-10-31 │ Bitcoin │ 30306464000 │ 13780.99 │ 255372070000 │ 0.017308505 │
|
||||
│ 2020-10-30 │ Bitcoin │ 30581486000 │ 13546.52 │ 251018150000 │ 0.008084608 │
|
||||
│ 2020-10-29 │ Bitcoin │ 56499500000 │ 13437.88 │ 248995320000 │ 0.012552661 │
|
||||
│ 2020-10-28 │ Bitcoin │ 35867320000 │ 13271.29 │ 245899820000 │ -0.02804481 │
|
||||
│ 2020-10-27 │ Bitcoin │ 33749879000 │ 13654.22 │ 252985950000 │ 0.04427984 │
|
||||
│ 2020-10-26 │ Bitcoin │ 29461459000 │ 13075.25 │ 242251000000 │ 0.0033826586 │
|
||||
│ 2020-10-25 │ Bitcoin │ 24406921000 │ 13031.17 │ 241425220000 │ -0.0058658565 │
|
||||
│ 2020-10-24 │ Bitcoin │ 24542319000 │ 13108.06 │ 242839880000 │ 0.013650347 │
|
||||
└────────────┴─────────────┴─────────────┴──────────┴──────────────┴───────────────┘
|
||||
```
|
||||
|
||||
Now suppose we have a table named `holdings` that contains a list of cryptocurrencies that we own, along with the number of coins:
|
||||
|
||||
```sql
|
||||
CREATE TABLE holdings
|
||||
(
|
||||
crypto_name String,
|
||||
quantity UInt64
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PRIMARY KEY (crypto_name);
|
||||
|
||||
INSERT INTO holdings VALUES
|
||||
('Bitcoin', 1000),
|
||||
('Bitcoin', 200),
|
||||
('Ethereum', 250),
|
||||
('Ethereum', 5000),
|
||||
('DOGEFI', 10);
|
||||
('Bitcoin Diamond', 5000);
|
||||
```
|
||||
|
||||
We can use `INTERSECT` to answer questions like **"Which coins do we own have traded at a price greater than $100?"**:
|
||||
|
||||
```sql
|
||||
SELECT crypto_name FROM holdings
|
||||
INTERSECT
|
||||
SELECT crypto_name FROM crypto_prices
|
||||
WHERE price > 100
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─one─┬─two─┬─three─┐
|
||||
│ q │ m │ b │
|
||||
│ s │ d │ f │
|
||||
│ s │ d │ f │
|
||||
│ s │ d │ f │
|
||||
└─────┴─────┴───────┘
|
||||
```response
|
||||
┌─crypto_name─┐
|
||||
│ Bitcoin │
|
||||
│ Bitcoin │
|
||||
│ Ethereum │
|
||||
│ Ethereum │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
This means at some point in time, Bitcoin and Ethereum traded above $100, and DOGEFI and Bitcoin Diamond have never traded above $100 (at least using the data we have here in this example).
|
||||
|
||||
## INTERSECT DISTINCT
|
||||
|
||||
Notice in the previous query we had multiple Bitcoin and Ethereum holdings that traded above $100. It might be nice to remove duplicate rows (since they only repeat what we already know). You can add `DISTINCT` to `INTERSECT` to eliminate duplicate rows from the result:
|
||||
|
||||
```sql
|
||||
SELECT crypto_name FROM holdings
|
||||
INTERSECT DISTINCT
|
||||
SELECT crypto_name FROM crypto_prices
|
||||
WHERE price > 100;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─crypto_name─┐
|
||||
│ Bitcoin │
|
||||
│ Ethereum │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
|
||||
**See Also**
|
||||
|
||||
- [UNION](union.md#union-clause)
|
||||
|
Loading…
Reference in New Issue
Block a user