mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Compare commits
32 Commits
6570801a29
...
3f4dd36336
Author | SHA1 | Date | |
---|---|---|---|
|
3f4dd36336 | ||
|
e0f8b8d351 | ||
|
da2176d696 | ||
|
53e0036593 | ||
|
25bd73ea5e | ||
|
72d5af29e0 | ||
|
44b4bd38b9 | ||
|
40c7d5fd1a | ||
|
9a2a664b04 | ||
|
4ccebd9a24 | ||
|
99177c0daf | ||
|
ad67608956 | ||
|
0951991c1d | ||
|
19aec5e572 | ||
|
a367de9977 | ||
|
6894e280b2 | ||
|
39ebe113d9 | ||
|
239bbaa133 | ||
|
07fac5808d | ||
|
ed95e0781f | ||
|
014608fb6b | ||
|
a29ded4941 | ||
|
d2efae7511 | ||
|
f76df37cfa | ||
|
63621bd381 | ||
|
49589da56e | ||
|
5746970ea9 | ||
|
6879aa130a | ||
|
43f3c886a2 | ||
|
c383a743f7 | ||
|
3827d90bb0 | ||
|
bf3a3ad607 |
@ -16,16 +16,18 @@ ClickHouse works 100-1000x faster than traditional database management systems,
|
|||||||
|
|
||||||
For more information and documentation see https://clickhouse.com/.
|
For more information and documentation see https://clickhouse.com/.
|
||||||
|
|
||||||
<!-- This is not related to the docker official library, remove it before commit to https://github.com/docker-library/docs -->
|
|
||||||
## Versions
|
## Versions
|
||||||
|
|
||||||
- The `latest` tag points to the latest release of the latest stable branch.
|
- The `latest` tag points to the latest release of the latest stable branch.
|
||||||
- Branch tags like `22.2` point to the latest release of the corresponding branch.
|
- Branch tags like `22.2` point to the latest release of the corresponding branch.
|
||||||
- Full version tags like `22.2.3.5` point to the corresponding release.
|
- Full version tags like `22.2.3` and `22.2.3.5` point to the corresponding release.
|
||||||
|
<!-- docker-official-library:off -->
|
||||||
|
<!-- This is not related to the docker official library, remove it before commit to https://github.com/docker-library/docs -->
|
||||||
- The tag `head` is built from the latest commit to the default branch.
|
- The tag `head` is built from the latest commit to the default branch.
|
||||||
- Each tag has optional `-alpine` suffix to reflect that it's built on top of `alpine`.
|
- Each tag has optional `-alpine` suffix to reflect that it's built on top of `alpine`.
|
||||||
|
|
||||||
<!-- REMOVE UNTIL HERE -->
|
<!-- REMOVE UNTIL HERE -->
|
||||||
|
<!-- docker-official-library:on -->
|
||||||
### Compatibility
|
### Compatibility
|
||||||
|
|
||||||
- The amd64 image requires support for [SSE3 instructions](https://en.wikipedia.org/wiki/SSE3). Virtually all x86 CPUs after 2005 support SSE3.
|
- The amd64 image requires support for [SSE3 instructions](https://en.wikipedia.org/wiki/SSE3). Virtually all x86 CPUs after 2005 support SSE3.
|
||||||
|
@ -10,16 +10,18 @@ ClickHouse works 100-1000x faster than traditional database management systems,
|
|||||||
|
|
||||||
For more information and documentation see https://clickhouse.com/.
|
For more information and documentation see https://clickhouse.com/.
|
||||||
|
|
||||||
<!-- This is not related to the docker official library, remove it before commit to https://github.com/docker-library/docs -->
|
|
||||||
## Versions
|
## Versions
|
||||||
|
|
||||||
- The `latest` tag points to the latest release of the latest stable branch.
|
- The `latest` tag points to the latest release of the latest stable branch.
|
||||||
- Branch tags like `22.2` point to the latest release of the corresponding branch.
|
- Branch tags like `22.2` point to the latest release of the corresponding branch.
|
||||||
- Full version tags like `22.2.3.5` point to the corresponding release.
|
- Full version tags like `22.2.3` and `22.2.3.5` point to the corresponding release.
|
||||||
|
<!-- docker-official-library:off -->
|
||||||
|
<!-- This is not related to the docker official library, remove it before commit to https://github.com/docker-library/docs -->
|
||||||
- The tag `head` is built from the latest commit to the default branch.
|
- The tag `head` is built from the latest commit to the default branch.
|
||||||
- Each tag has optional `-alpine` suffix to reflect that it's built on top of `alpine`.
|
- Each tag has optional `-alpine` suffix to reflect that it's built on top of `alpine`.
|
||||||
|
|
||||||
<!-- REMOVE UNTIL HERE -->
|
<!-- REMOVE UNTIL HERE -->
|
||||||
|
<!-- docker-official-library:on -->
|
||||||
### Compatibility
|
### Compatibility
|
||||||
|
|
||||||
- The amd64 image requires support for [SSE3 instructions](https://en.wikipedia.org/wiki/SSE3). Virtually all x86 CPUs after 2005 support SSE3.
|
- The amd64 image requires support for [SSE3 instructions](https://en.wikipedia.org/wiki/SSE3). Virtually all x86 CPUs after 2005 support SSE3.
|
||||||
|
@ -522,4 +522,3 @@ sidebar_label: 2024
|
|||||||
* Backported in [#68518](https://github.com/ClickHouse/ClickHouse/issues/68518): Minor update in Dynamic/JSON serializations. [#68459](https://github.com/ClickHouse/ClickHouse/pull/68459) ([Kruglov Pavel](https://github.com/Avogar)).
|
* Backported in [#68518](https://github.com/ClickHouse/ClickHouse/issues/68518): Minor update in Dynamic/JSON serializations. [#68459](https://github.com/ClickHouse/ClickHouse/pull/68459) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||||
* Backported in [#68558](https://github.com/ClickHouse/ClickHouse/issues/68558): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)).
|
* Backported in [#68558](https://github.com/ClickHouse/ClickHouse/issues/68558): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)).
|
||||||
* Backported in [#68576](https://github.com/ClickHouse/ClickHouse/issues/68576): CI: Tidy build timeout from 2h to 3h. [#68567](https://github.com/ClickHouse/ClickHouse/pull/68567) ([Max K.](https://github.com/maxknv)).
|
* Backported in [#68576](https://github.com/ClickHouse/ClickHouse/issues/68576): CI: Tidy build timeout from 2h to 3h. [#68567](https://github.com/ClickHouse/ClickHouse/pull/68567) ([Max K.](https://github.com/maxknv)).
|
||||||
|
|
||||||
|
@ -497,4 +497,3 @@ sidebar_label: 2024
|
|||||||
* Backported in [#69899](https://github.com/ClickHouse/ClickHouse/issues/69899): Revert "Merge pull request [#69032](https://github.com/ClickHouse/ClickHouse/issues/69032) from alexon1234/include_real_time_execution_in_http_header". [#69885](https://github.com/ClickHouse/ClickHouse/pull/69885) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
* Backported in [#69899](https://github.com/ClickHouse/ClickHouse/issues/69899): Revert "Merge pull request [#69032](https://github.com/ClickHouse/ClickHouse/issues/69032) from alexon1234/include_real_time_execution_in_http_header". [#69885](https://github.com/ClickHouse/ClickHouse/pull/69885) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||||
* Backported in [#69931](https://github.com/ClickHouse/ClickHouse/issues/69931): RIPE is an acronym and thus should be capital. RIPE stands for **R**ACE **I**ntegrity **P**rimitives **E**valuation and RACE stands for **R**esearch and Development in **A**dvanced **C**ommunications **T**echnologies in **E**urope. [#69901](https://github.com/ClickHouse/ClickHouse/pull/69901) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
* Backported in [#69931](https://github.com/ClickHouse/ClickHouse/issues/69931): RIPE is an acronym and thus should be capital. RIPE stands for **R**ACE **I**ntegrity **P**rimitives **E**valuation and RACE stands for **R**esearch and Development in **A**dvanced **C**ommunications **T**echnologies in **E**urope. [#69901](https://github.com/ClickHouse/ClickHouse/pull/69901) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||||
* Backported in [#70034](https://github.com/ClickHouse/ClickHouse/issues/70034): Revert "Add RIPEMD160 function". [#70005](https://github.com/ClickHouse/ClickHouse/pull/70005) ([Robert Schulze](https://github.com/rschu1ze)).
|
* Backported in [#70034](https://github.com/ClickHouse/ClickHouse/issues/70034): Revert "Add RIPEMD160 function". [#70005](https://github.com/ClickHouse/ClickHouse/pull/70005) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||||
|
|
||||||
|
@ -936,4 +936,4 @@ SELECT mapPartialReverseSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2));
|
|||||||
┌─mapPartialReverseSort(lambda(tuple(k, v), v), 2, map('k1', 3, 'k2', 1, 'k3', 2))─┐
|
┌─mapPartialReverseSort(lambda(tuple(k, v), v), 2, map('k1', 3, 'k2', 1, 'k3', 2))─┐
|
||||||
│ {'k1':3,'k3':2,'k2':1} │
|
│ {'k1':3,'k3':2,'k2':1} │
|
||||||
└──────────────────────────────────────────────────────────────────────────────────┘
|
└──────────────────────────────────────────────────────────────────────────────────┘
|
||||||
```
|
```
|
||||||
|
@ -49,4 +49,4 @@ LIMIT 2
|
|||||||
**See Also**
|
**See Also**
|
||||||
|
|
||||||
- [DeltaLake engine](/docs/en/engines/table-engines/integrations/deltalake.md)
|
- [DeltaLake engine](/docs/en/engines/table-engines/integrations/deltalake.md)
|
||||||
|
- [DeltaLake cluster table function](/docs/en/sql-reference/table-functions/deltalakeCluster.md)
|
||||||
|
30
docs/en/sql-reference/table-functions/deltalakeCluster.md
Normal file
30
docs/en/sql-reference/table-functions/deltalakeCluster.md
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
---
|
||||||
|
slug: /en/sql-reference/table-functions/deltalakeCluster
|
||||||
|
sidebar_position: 46
|
||||||
|
sidebar_label: deltaLakeCluster
|
||||||
|
title: "deltaLakeCluster Table Function"
|
||||||
|
---
|
||||||
|
This is an extension to the [deltaLake](/docs/en/sql-reference/table-functions/deltalake.md) table function.
|
||||||
|
|
||||||
|
Allows processing files from [Delta Lake](https://github.com/delta-io/delta) tables in Amazon S3 in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
deltaLakeCluster(cluster_name, url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
|
||||||
|
```
|
||||||
|
|
||||||
|
**Arguments**
|
||||||
|
|
||||||
|
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
|
||||||
|
|
||||||
|
- Description of all other arguments coincides with description of arguments in equivalent [deltaLake](/docs/en/sql-reference/table-functions/deltalake.md) table function.
|
||||||
|
|
||||||
|
**Returned value**
|
||||||
|
|
||||||
|
A table with the specified structure for reading data from cluster in the specified Delta Lake table in S3.
|
||||||
|
|
||||||
|
**See Also**
|
||||||
|
|
||||||
|
- [deltaLake engine](/docs/en/engines/table-engines/integrations/deltalake.md)
|
||||||
|
- [deltaLake table function](/docs/en/sql-reference/table-functions/deltalake.md)
|
@ -29,4 +29,4 @@ A table with the specified structure for reading data in the specified Hudi tabl
|
|||||||
**See Also**
|
**See Also**
|
||||||
|
|
||||||
- [Hudi engine](/docs/en/engines/table-engines/integrations/hudi.md)
|
- [Hudi engine](/docs/en/engines/table-engines/integrations/hudi.md)
|
||||||
|
- [Hudi cluster table function](/docs/en/sql-reference/table-functions/hudiCluster.md)
|
||||||
|
30
docs/en/sql-reference/table-functions/hudiCluster.md
Normal file
30
docs/en/sql-reference/table-functions/hudiCluster.md
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
---
|
||||||
|
slug: /en/sql-reference/table-functions/hudiCluster
|
||||||
|
sidebar_position: 86
|
||||||
|
sidebar_label: hudiCluster
|
||||||
|
title: "hudiCluster Table Function"
|
||||||
|
---
|
||||||
|
This is an extension to the [hudi](/docs/en/sql-reference/table-functions/hudi.md) table function.
|
||||||
|
|
||||||
|
Allows processing files from Apache [Hudi](https://hudi.apache.org/) tables in Amazon S3 in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
hudiCluster(cluster_name, url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
|
||||||
|
```
|
||||||
|
|
||||||
|
**Arguments**
|
||||||
|
|
||||||
|
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
|
||||||
|
|
||||||
|
- Description of all other arguments coincides with description of arguments in equivalent [hudi](/docs/en/sql-reference/table-functions/hudi.md) table function.
|
||||||
|
|
||||||
|
**Returned value**
|
||||||
|
|
||||||
|
A table with the specified structure for reading data from cluster in the specified Hudi table in S3.
|
||||||
|
|
||||||
|
**See Also**
|
||||||
|
|
||||||
|
- [Hudi engine](/docs/en/engines/table-engines/integrations/hudi.md)
|
||||||
|
- [Hudi table function](/docs/en/sql-reference/table-functions/hudi.md)
|
@ -72,3 +72,4 @@ Table function `iceberg` is an alias to `icebergS3` now.
|
|||||||
**See Also**
|
**See Also**
|
||||||
|
|
||||||
- [Iceberg engine](/docs/en/engines/table-engines/integrations/iceberg.md)
|
- [Iceberg engine](/docs/en/engines/table-engines/integrations/iceberg.md)
|
||||||
|
- [Iceberg cluster table function](/docs/en/sql-reference/table-functions/icebergCluster.md)
|
||||||
|
43
docs/en/sql-reference/table-functions/icebergCluster.md
Normal file
43
docs/en/sql-reference/table-functions/icebergCluster.md
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
---
|
||||||
|
slug: /en/sql-reference/table-functions/icebergCluster
|
||||||
|
sidebar_position: 91
|
||||||
|
sidebar_label: icebergCluster
|
||||||
|
title: "icebergCluster Table Function"
|
||||||
|
---
|
||||||
|
This is an extension to the [iceberg](/docs/en/sql-reference/table-functions/iceberg.md) table function.
|
||||||
|
|
||||||
|
Allows processing files from Apache [Iceberg](https://iceberg.apache.org/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
icebergS3Cluster(cluster_name, url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,compression_method])
|
||||||
|
icebergS3Cluster(cluster_name, named_collection[, option=value [,..]])
|
||||||
|
|
||||||
|
icebergAzureCluster(cluster_name, connection_string|storage_account_url, container_name, blobpath, [,account_name], [,account_key] [,format] [,compression_method])
|
||||||
|
icebergAzureCluster(cluster_name, named_collection[, option=value [,..]])
|
||||||
|
|
||||||
|
icebergHDFSCluster(cluster_name, path_to_table, [,format] [,compression_method])
|
||||||
|
icebergHDFSCluster(cluster_name, named_collection[, option=value [,..]])
|
||||||
|
```
|
||||||
|
|
||||||
|
**Arguments**
|
||||||
|
|
||||||
|
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
|
||||||
|
|
||||||
|
- Description of all other arguments coincides with description of arguments in equivalent [iceberg](/docs/en/sql-reference/table-functions/iceberg.md) table function.
|
||||||
|
|
||||||
|
**Returned value**
|
||||||
|
|
||||||
|
A table with the specified structure for reading data from cluster in the specified Iceberg table.
|
||||||
|
|
||||||
|
**Examples**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM icebergS3Cluster('cluster_simple', 'http://test.s3.amazonaws.com/clickhouse-bucket/test_table', 'test', 'test')
|
||||||
|
```
|
||||||
|
|
||||||
|
**See Also**
|
||||||
|
|
||||||
|
- [Iceberg engine](/docs/en/engines/table-engines/integrations/iceberg.md)
|
||||||
|
- [Iceberg table function](/docs/en/sql-reference/table-functions/iceberg.md)
|
@ -476,7 +476,7 @@
|
|||||||
<input id="edit" type="button" value="✎" style="display: none;">
|
<input id="edit" type="button" value="✎" style="display: none;">
|
||||||
<input id="add" type="button" value="Add chart" style="display: none;">
|
<input id="add" type="button" value="Add chart" style="display: none;">
|
||||||
<input id="reload" type="button" value="Reload">
|
<input id="reload" type="button" value="Reload">
|
||||||
<span id="search-span" class="nowrap" style="display: none;"><input id="search" type="button" value="🔎" title="Run query to obtain list of charts from ClickHouse"><input id="search-query" name="search" type="text" spellcheck="false"></span>
|
<span id="search-span" class="nowrap" style="display: none;"><input id="search" type="button" value="🔎" title="Run query to obtain list of charts from ClickHouse. Either select dashboard name or write your own query"><input id="search-query" name="search" list="search-options" type="text" spellcheck="false"><datalist id="search-options"></datalist></span>
|
||||||
<div id="chart-params"></div>
|
<div id="chart-params"></div>
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
@ -532,9 +532,15 @@ const errorMessages = [
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
/// Dashboard selector
|
||||||
|
const dashboardSearchQuery = (dashboard_name) => `SELECT title, query FROM system.dashboards WHERE dashboard = '${dashboard_name}'`;
|
||||||
|
let dashboard_queries = {
|
||||||
|
"Overview": dashboardSearchQuery("Overview"),
|
||||||
|
};
|
||||||
|
const default_dashboard = 'Overview';
|
||||||
|
|
||||||
/// Query to fill `queries` list for the dashboard
|
/// Query to fill `queries` list for the dashboard
|
||||||
let search_query = `SELECT title, query FROM system.dashboards WHERE dashboard = 'Overview'`;
|
let search_query = dashboardSearchQuery(default_dashboard);
|
||||||
let customized = false;
|
let customized = false;
|
||||||
let queries = [];
|
let queries = [];
|
||||||
|
|
||||||
@ -1439,7 +1445,7 @@ async function reloadAll(do_search) {
|
|||||||
try {
|
try {
|
||||||
updateParams();
|
updateParams();
|
||||||
if (do_search) {
|
if (do_search) {
|
||||||
search_query = document.getElementById('search-query').value;
|
search_query = toSearchQuery(document.getElementById('search-query').value);
|
||||||
queries = [];
|
queries = [];
|
||||||
refreshCustomized(false);
|
refreshCustomized(false);
|
||||||
}
|
}
|
||||||
@ -1504,7 +1510,7 @@ function updateFromState() {
|
|||||||
document.getElementById('url').value = host;
|
document.getElementById('url').value = host;
|
||||||
document.getElementById('user').value = user;
|
document.getElementById('user').value = user;
|
||||||
document.getElementById('password').value = password;
|
document.getElementById('password').value = password;
|
||||||
document.getElementById('search-query').value = search_query;
|
document.getElementById('search-query').value = fromSearchQuery(search_query);
|
||||||
refreshCustomized();
|
refreshCustomized();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1543,6 +1549,44 @@ if (window.location.hash) {
|
|||||||
} catch {}
|
} catch {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function fromSearchQuery(query) {
|
||||||
|
for (const dashboard_name in dashboard_queries) {
|
||||||
|
if (query == dashboard_queries[dashboard_name])
|
||||||
|
return dashboard_name;
|
||||||
|
}
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
|
||||||
|
function toSearchQuery(value) {
|
||||||
|
if (value in dashboard_queries)
|
||||||
|
return dashboard_queries[value];
|
||||||
|
else
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function populateSearchOptions() {
|
||||||
|
let {reply, error} = await doFetch("SELECT dashboard FROM system.dashboards GROUP BY dashboard ORDER BY ALL");
|
||||||
|
if (error) {
|
||||||
|
throw new Error(error);
|
||||||
|
}
|
||||||
|
let data = reply.data;
|
||||||
|
if (data.dashboard.length == 0) {
|
||||||
|
console.log("Unable to fetch dashboards list");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
dashboard_queries = {};
|
||||||
|
for (let i = 0; i < data.dashboard.length; i++) {
|
||||||
|
const dashboard = data.dashboard[i];
|
||||||
|
dashboard_queries[dashboard] = dashboardSearchQuery(dashboard);
|
||||||
|
}
|
||||||
|
const searchOptions = document.getElementById('search-options');
|
||||||
|
for (const dashboard in dashboard_queries) {
|
||||||
|
const opt = document.createElement('option');
|
||||||
|
opt.value = dashboard;
|
||||||
|
searchOptions.appendChild(opt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function start() {
|
async function start() {
|
||||||
try {
|
try {
|
||||||
updateFromState();
|
updateFromState();
|
||||||
@ -1558,6 +1602,7 @@ async function start() {
|
|||||||
} else {
|
} else {
|
||||||
drawAll();
|
drawAll();
|
||||||
}
|
}
|
||||||
|
await populateSearchOptions();
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
showError(e.message);
|
showError(e.message);
|
||||||
}
|
}
|
||||||
|
@ -528,7 +528,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromCompoundExpression(
|
|||||||
*
|
*
|
||||||
* Resolve strategy:
|
* Resolve strategy:
|
||||||
* 1. Try to bind identifier to scope argument name to node map.
|
* 1. Try to bind identifier to scope argument name to node map.
|
||||||
* 2. If identifier is binded but expression context and node type are incompatible return nullptr.
|
* 2. If identifier is bound but expression context and node type are incompatible return nullptr.
|
||||||
*
|
*
|
||||||
* It is important to support edge cases, where we lookup for table or function node, but argument has same name.
|
* It is important to support edge cases, where we lookup for table or function node, but argument has same name.
|
||||||
* Example: WITH (x -> x + 1) AS func, (func -> func(1) + func) AS lambda SELECT lambda(1);
|
* Example: WITH (x -> x + 1) AS func, (func -> func(1) + func) AS lambda SELECT lambda(1);
|
||||||
|
@ -135,6 +135,7 @@ add_headers_and_sources(dbms Storages/ObjectStorage/S3)
|
|||||||
add_headers_and_sources(dbms Storages/ObjectStorage/HDFS)
|
add_headers_and_sources(dbms Storages/ObjectStorage/HDFS)
|
||||||
add_headers_and_sources(dbms Storages/ObjectStorage/Local)
|
add_headers_and_sources(dbms Storages/ObjectStorage/Local)
|
||||||
add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes)
|
add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes)
|
||||||
|
add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes/Iceberg)
|
||||||
add_headers_and_sources(dbms Common/NamedCollections)
|
add_headers_and_sources(dbms Common/NamedCollections)
|
||||||
add_headers_and_sources(dbms Common/Scheduler/Workload)
|
add_headers_and_sources(dbms Common/Scheduler/Workload)
|
||||||
|
|
||||||
|
@ -362,7 +362,7 @@ ReplxxLineReader::ReplxxLineReader(
|
|||||||
if (highlighter)
|
if (highlighter)
|
||||||
rx.set_highlighter_callback(highlighter);
|
rx.set_highlighter_callback(highlighter);
|
||||||
|
|
||||||
/// By default C-p/C-n binded to COMPLETE_NEXT/COMPLETE_PREV,
|
/// By default C-p/C-n bound to COMPLETE_NEXT/COMPLETE_PREV,
|
||||||
/// bind C-p/C-n to history-previous/history-next like readline.
|
/// bind C-p/C-n to history-previous/history-next like readline.
|
||||||
rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); });
|
rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); });
|
||||||
rx.bind_key(Replxx::KEY::control('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_PREVIOUS, code); });
|
rx.bind_key(Replxx::KEY::control('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_PREVIOUS, code); });
|
||||||
@ -384,9 +384,9 @@ ReplxxLineReader::ReplxxLineReader(
|
|||||||
rx.bind_key(Replxx::KEY::control('J'), commit_action);
|
rx.bind_key(Replxx::KEY::control('J'), commit_action);
|
||||||
rx.bind_key(Replxx::KEY::ENTER, commit_action);
|
rx.bind_key(Replxx::KEY::ENTER, commit_action);
|
||||||
|
|
||||||
/// By default COMPLETE_NEXT/COMPLETE_PREV was binded to C-p/C-n, re-bind
|
/// By default COMPLETE_NEXT/COMPLETE_PREV was bound to C-p/C-n, re-bind
|
||||||
/// to M-P/M-N (that was used for HISTORY_COMMON_PREFIX_SEARCH before, but
|
/// to M-P/M-N (that was used for HISTORY_COMMON_PREFIX_SEARCH before, but
|
||||||
/// it also binded to M-p/M-n).
|
/// it also bound to M-p/M-n).
|
||||||
rx.bind_key(Replxx::KEY::meta('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMPLETE_NEXT, code); });
|
rx.bind_key(Replxx::KEY::meta('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMPLETE_NEXT, code); });
|
||||||
rx.bind_key(Replxx::KEY::meta('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMPLETE_PREVIOUS, code); });
|
rx.bind_key(Replxx::KEY::meta('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMPLETE_PREVIOUS, code); });
|
||||||
/// By default M-BACKSPACE is KILL_TO_WHITESPACE_ON_LEFT, while in readline it is backward-kill-word
|
/// By default M-BACKSPACE is KILL_TO_WHITESPACE_ON_LEFT, while in readline it is backward-kill-word
|
||||||
|
@ -291,3 +291,15 @@ namespace cctz_extension
|
|||||||
|
|
||||||
ZoneInfoSourceFactory zone_info_source_factory = custom_factory;
|
ZoneInfoSourceFactory zone_info_source_factory = custom_factory;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DateLUTImpl::Values DateLUTImpl::lutIndexByMonthSinceEpochStartsZeroIndexing(Int32 months) const
|
||||||
|
{
|
||||||
|
Int16 year = 1970 + months / 12;
|
||||||
|
UInt8 month = months % 12 + 1;
|
||||||
|
return lut[makeLUTIndex(year, month, 1)];
|
||||||
|
}
|
||||||
|
|
||||||
|
DateLUTImpl::Values DateLUTImpl::lutIndexByYearSinceEpochStartsZeroIndexing(Int16 years) const
|
||||||
|
{
|
||||||
|
return lut[makeLUTIndex(years + 1970, 1, 1)];
|
||||||
|
}
|
||||||
|
@ -1166,6 +1166,10 @@ public:
|
|||||||
return LUTIndex{std::min(index, static_cast<UInt32>(DATE_LUT_SIZE - 1))};
|
return LUTIndex{std::min(index, static_cast<UInt32>(DATE_LUT_SIZE - 1))};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Values lutIndexByMonthSinceEpochStartsZeroIndexing(Int32 months) const;
|
||||||
|
|
||||||
|
Values lutIndexByYearSinceEpochStartsZeroIndexing(Int16 years) const;
|
||||||
|
|
||||||
/// Create DayNum from year, month, day of month.
|
/// Create DayNum from year, month, day of month.
|
||||||
ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month, Int32 default_error_day_num = 0) const
|
ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month, Int32 default_error_day_num = 0) const
|
||||||
{
|
{
|
||||||
|
@ -62,16 +62,17 @@ public:
|
|||||||
for (size_t i = 0; i < num_rows; ++i)
|
for (size_t i = 0; i < num_rows; ++i)
|
||||||
{
|
{
|
||||||
auto array_size = col_num->getInt(i);
|
auto array_size = col_num->getInt(i);
|
||||||
|
auto element_size = col_value->byteSizeAt(i);
|
||||||
|
|
||||||
if (unlikely(array_size < 0))
|
if (unlikely(array_size < 0))
|
||||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array size {} cannot be negative: while executing function {}", array_size, getName());
|
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array size {} cannot be negative: while executing function {}", array_size, getName());
|
||||||
|
|
||||||
Int64 estimated_size = 0;
|
Int64 estimated_size = 0;
|
||||||
if (unlikely(common::mulOverflow(array_size, col_value->byteSize(), estimated_size)))
|
if (unlikely(common::mulOverflow(array_size, element_size, estimated_size)))
|
||||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array size {} with element size {} bytes is too large: while executing function {}", array_size, col_value->byteSize(), getName());
|
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array size {} with element size {} bytes is too large: while executing function {}", array_size, element_size, getName());
|
||||||
|
|
||||||
if (unlikely(estimated_size > max_array_size_in_columns_bytes))
|
if (unlikely(estimated_size > max_array_size_in_columns_bytes))
|
||||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array size {} with element size {} bytes is too large: while executing function {}", array_size, col_value->byteSize(), getName());
|
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array size {} with element size {} bytes is too large: while executing function {}", array_size, element_size, getName());
|
||||||
|
|
||||||
offset += array_size;
|
offset += array_size;
|
||||||
|
|
||||||
|
@ -237,6 +237,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
|
|||||||
null_modifier.emplace(true);
|
null_modifier.emplace(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_comment = false;
|
||||||
/// Collate is also allowed after NULL/NOT NULL
|
/// Collate is also allowed after NULL/NOT NULL
|
||||||
if (!collation_expression && s_collate.ignore(pos, expected)
|
if (!collation_expression && s_collate.ignore(pos, expected)
|
||||||
&& !collation_parser.parse(pos, collation_expression, expected))
|
&& !collation_parser.parse(pos, collation_expression, expected))
|
||||||
@ -254,7 +255,9 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
|
|||||||
else if (s_ephemeral.ignore(pos, expected))
|
else if (s_ephemeral.ignore(pos, expected))
|
||||||
{
|
{
|
||||||
default_specifier = s_ephemeral.getName();
|
default_specifier = s_ephemeral.getName();
|
||||||
if (!expr_parser.parse(pos, default_expression, expected) && type)
|
if (s_comment.ignore(pos, expected))
|
||||||
|
is_comment = true;
|
||||||
|
if ((is_comment || !expr_parser.parse(pos, default_expression, expected)) && type)
|
||||||
{
|
{
|
||||||
ephemeral_default = true;
|
ephemeral_default = true;
|
||||||
|
|
||||||
@ -289,19 +292,22 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
|
|||||||
if (require_type && !type && !default_expression)
|
if (require_type && !type && !default_expression)
|
||||||
return false; /// reject column name without type
|
return false; /// reject column name without type
|
||||||
|
|
||||||
if ((type || default_expression) && allow_null_modifiers && !null_modifier.has_value())
|
if (!is_comment)
|
||||||
{
|
{
|
||||||
if (s_not.ignore(pos, expected))
|
if ((type || default_expression) && allow_null_modifiers && !null_modifier.has_value())
|
||||||
{
|
{
|
||||||
if (!s_null.ignore(pos, expected))
|
if (s_not.ignore(pos, expected))
|
||||||
return false;
|
{
|
||||||
null_modifier.emplace(false);
|
if (!s_null.ignore(pos, expected))
|
||||||
|
return false;
|
||||||
|
null_modifier.emplace(false);
|
||||||
|
}
|
||||||
|
else if (s_null.ignore(pos, expected))
|
||||||
|
null_modifier.emplace(true);
|
||||||
}
|
}
|
||||||
else if (s_null.ignore(pos, expected))
|
|
||||||
null_modifier.emplace(true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (s_comment.ignore(pos, expected))
|
if (is_comment || s_comment.ignore(pos, expected))
|
||||||
{
|
{
|
||||||
/// should be followed by a string literal
|
/// should be followed by a string literal
|
||||||
if (!string_literal_parser.parse(pos, comment_expression, expected))
|
if (!string_literal_parser.parse(pos, comment_expression, expected))
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
#include <Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h>
|
#include <Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h>
|
||||||
#include <Storages/ObjectStorage/DataLakes/HudiMetadata.h>
|
#include <Storages/ObjectStorage/DataLakes/HudiMetadata.h>
|
||||||
#include <Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h>
|
#include <Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h>
|
||||||
#include <Storages/ObjectStorage/DataLakes/IcebergMetadata.h>
|
#include <Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h>
|
||||||
#include <Storages/ObjectStorage/HDFS/Configuration.h>
|
#include <Storages/ObjectStorage/HDFS/Configuration.h>
|
||||||
#include <Storages/ObjectStorage/Local/Configuration.h>
|
#include <Storages/ObjectStorage/Local/Configuration.h>
|
||||||
#include <Storages/ObjectStorage/S3/Configuration.h>
|
#include <Storages/ObjectStorage/S3/Configuration.h>
|
||||||
|
@ -1,44 +1,46 @@
|
|||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
|
|
||||||
#if USE_AVRO
|
#if USE_AVRO
|
||||||
|
|
||||||
#include <Common/logger_useful.h>
|
# include <Columns/ColumnString.h>
|
||||||
#include <Core/Settings.h>
|
# include <Columns/ColumnTuple.h>
|
||||||
#include <Columns/ColumnString.h>
|
# include <Columns/IColumn.h>
|
||||||
#include <Columns/ColumnTuple.h>
|
# include <Core/Settings.h>
|
||||||
#include <Columns/IColumn.h>
|
# include <DataTypes/DataTypeArray.h>
|
||||||
#include <DataTypes/DataTypeArray.h>
|
# include <DataTypes/DataTypeDate.h>
|
||||||
#include <DataTypes/DataTypeDate.h>
|
# include <DataTypes/DataTypeDateTime64.h>
|
||||||
#include <DataTypes/DataTypeDateTime64.h>
|
# include <DataTypes/DataTypeFactory.h>
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
# include <DataTypes/DataTypeFixedString.h>
|
||||||
#include <DataTypes/DataTypeFixedString.h>
|
# include <DataTypes/DataTypeMap.h>
|
||||||
#include <DataTypes/DataTypeMap.h>
|
# include <DataTypes/DataTypeNullable.h>
|
||||||
#include <DataTypes/DataTypeNullable.h>
|
# include <DataTypes/DataTypeString.h>
|
||||||
#include <DataTypes/DataTypeString.h>
|
# include <DataTypes/DataTypeTuple.h>
|
||||||
#include <DataTypes/DataTypeTuple.h>
|
# include <DataTypes/DataTypeUUID.h>
|
||||||
#include <DataTypes/DataTypeUUID.h>
|
# include <DataTypes/DataTypesDecimal.h>
|
||||||
#include <DataTypes/DataTypesDecimal.h>
|
# include <DataTypes/DataTypesNumber.h>
|
||||||
#include <DataTypes/DataTypesNumber.h>
|
# include <Formats/FormatFactory.h>
|
||||||
#include <Formats/FormatFactory.h>
|
# include <IO/ReadBufferFromFileBase.h>
|
||||||
#include <IO/ReadBufferFromString.h>
|
# include <IO/ReadBufferFromString.h>
|
||||||
#include <IO/ReadBufferFromFileBase.h>
|
# include <IO/ReadHelpers.h>
|
||||||
#include <IO/ReadHelpers.h>
|
# include <Processors/Formats/Impl/AvroRowInputFormat.h>
|
||||||
#include <Processors/Formats/Impl/AvroRowInputFormat.h>
|
# include <Storages/ObjectStorage/DataLakes/Common.h>
|
||||||
#include <Storages/ObjectStorage/DataLakes/IcebergMetadata.h>
|
# include <Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h>
|
||||||
#include <Storages/ObjectStorage/DataLakes/Common.h>
|
# include <Storages/ObjectStorage/StorageObjectStorageSource.h>
|
||||||
#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
|
# include <Common/logger_useful.h>
|
||||||
|
|
||||||
#include <Poco/JSON/Array.h>
|
|
||||||
#include <Poco/JSON/Object.h>
|
|
||||||
#include <Poco/JSON/Parser.h>
|
|
||||||
|
|
||||||
#include <filesystem>
|
# include <Poco/JSON/Array.h>
|
||||||
|
# include <Poco/JSON/Object.h>
|
||||||
|
# include <Poco/JSON/Parser.h>
|
||||||
|
|
||||||
|
# include <filesystem>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
namespace Setting
|
namespace Setting
|
||||||
{
|
{
|
||||||
extern const SettingsBool iceberg_engine_ignore_schema_evolution;
|
extern const SettingsBool iceberg_engine_ignore_schema_evolution;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
@ -87,6 +89,7 @@ enum class DataFileContent : uint8_t
|
|||||||
EQUALITY_DELETES = 2,
|
EQUALITY_DELETES = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Iceberg supports the next data types (see https://iceberg.apache.org/spec/#schemas-and-data-types):
|
* Iceberg supports the next data types (see https://iceberg.apache.org/spec/#schemas-and-data-types):
|
||||||
* - Primitive types:
|
* - Primitive types:
|
||||||
@ -245,10 +248,10 @@ DataTypePtr getFieldType(const Poco::JSON::Object::Ptr & field, const String & t
|
|||||||
}
|
}
|
||||||
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected 'type' field: {}", type.toString());
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected 'type' field: {}", type.toString());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<NamesAndTypesList, Int32> parseTableSchema(const Poco::JSON::Object::Ptr & metadata_object, int format_version, bool ignore_schema_evolution)
|
std::tuple<NamesAndTypesList, Int32, std::unordered_map<Int32, NameAndTypePair>>
|
||||||
|
parseTableSchema(const Poco::JSON::Object::Ptr & metadata_object, int format_version, bool ignore_schema_evolution)
|
||||||
{
|
{
|
||||||
Poco::JSON::Object::Ptr schema;
|
Poco::JSON::Object::Ptr schema;
|
||||||
Int32 current_schema_id;
|
Int32 current_schema_id;
|
||||||
@ -279,20 +282,24 @@ std::pair<NamesAndTypesList, Int32> parseTableSchema(const Poco::JSON::Object::P
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!schema)
|
if (!schema)
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, R"(There is no schema with "schema-id" that matches "current-schema-id" in metadata)");
|
throw Exception(
|
||||||
|
ErrorCodes::BAD_ARGUMENTS, R"(There is no schema with "schema-id" that matches "current-schema-id" in metadata)");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (schemas->size() != 1)
|
if (schemas->size() != 1)
|
||||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
throw Exception(
|
||||||
|
ErrorCodes::UNSUPPORTED_METHOD,
|
||||||
"Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is "
|
"Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is "
|
||||||
"supported. If you want to ignore schema evolution and read all files using latest schema saved on table creation, enable setting "
|
"supported. If you want to ignore schema evolution and read all files using latest schema saved on table creation, "
|
||||||
|
"enable setting "
|
||||||
"iceberg_engine_ignore_schema_evolution (Note: enabling this setting can lead to incorrect result)");
|
"iceberg_engine_ignore_schema_evolution (Note: enabling this setting can lead to incorrect result)");
|
||||||
|
|
||||||
/// Now we sure that there is only one schema.
|
/// Now we sure that there is only one schema.
|
||||||
schema = schemas->getObject(0);
|
schema = schemas->getObject(0);
|
||||||
if (schema->getValue<int>("schema-id") != current_schema_id)
|
if (schema->getValue<int>("schema-id") != current_schema_id)
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, R"(Field "schema-id" of the schema doesn't match "current-schema-id" in metadata)");
|
throw Exception(
|
||||||
|
ErrorCodes::BAD_ARGUMENTS, R"(Field "schema-id" of the schema doesn't match "current-schema-id" in metadata)");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -301,30 +308,32 @@ std::pair<NamesAndTypesList, Int32> parseTableSchema(const Poco::JSON::Object::P
|
|||||||
current_schema_id = schema->getValue<int>("schema-id");
|
current_schema_id = schema->getValue<int>("schema-id");
|
||||||
/// Field "schemas" is optional for version 1, but after version 2 was introduced,
|
/// Field "schemas" is optional for version 1, but after version 2 was introduced,
|
||||||
/// in most cases this field is added for new tables in version 1 as well.
|
/// in most cases this field is added for new tables in version 1 as well.
|
||||||
if (!ignore_schema_evolution && metadata_object->has("schemas") && metadata_object->get("schemas").extract<Poco::JSON::Array::Ptr>()->size() > 1)
|
if (!ignore_schema_evolution && metadata_object->has("schemas")
|
||||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
&& metadata_object->get("schemas").extract<Poco::JSON::Array::Ptr>()->size() > 1§)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::UNSUPPORTED_METHOD,
|
||||||
"Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not "
|
"Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not "
|
||||||
"supported. If you want to ignore schema evolution and read all files using latest schema saved on table creation, enable setting "
|
"supported. If you want to ignore schema evolution and read all files using latest schema saved on table creation, enable "
|
||||||
|
"setting "
|
||||||
"iceberg_engine_ignore_schema_evolution (Note: enabling this setting can lead to incorrect result)");
|
"iceberg_engine_ignore_schema_evolution (Note: enabling this setting can lead to incorrect result)");
|
||||||
}
|
}
|
||||||
|
|
||||||
NamesAndTypesList names_and_types;
|
NamesAndTypesList names_and_types;
|
||||||
auto fields = schema->get("fields").extract<Poco::JSON::Array::Ptr>();
|
auto fields = schema->get("fields").extract<Poco::JSON::Array::Ptr>();
|
||||||
|
std::unordered_map<Int32, NameAndTypePair> name_and_type_by_source_id;
|
||||||
for (size_t i = 0; i != fields->size(); ++i)
|
for (size_t i = 0; i != fields->size(); ++i)
|
||||||
{
|
{
|
||||||
auto field = fields->getObject(static_cast<UInt32>(i));
|
auto field = fields->getObject(static_cast<UInt32>(i));
|
||||||
auto name = field->getValue<String>("name");
|
auto name = field->getValue<String>("name");
|
||||||
bool required = field->getValue<bool>("required");
|
bool required = field->getValue<bool>("required");
|
||||||
names_and_types.push_back({name, getFieldType(field, "type", required)});
|
names_and_types.push_back({name, getFieldType(field, "type", required)});
|
||||||
|
name_and_type_by_source_id[field->getValue<int>("id")] = {name, names_and_types.back().type};
|
||||||
}
|
}
|
||||||
|
|
||||||
return {std::move(names_and_types), current_schema_id};
|
return {std::move(names_and_types), current_schema_id, name_and_type_by_source_id};
|
||||||
}
|
}
|
||||||
|
|
||||||
MutableColumns parseAvro(
|
MutableColumns parseAvro(avro::DataFileReaderBase & file_reader, const Block & header, const FormatSettings & settings)
|
||||||
avro::DataFileReaderBase & file_reader,
|
|
||||||
const Block & header,
|
|
||||||
const FormatSettings & settings)
|
|
||||||
{
|
{
|
||||||
auto deserializer = std::make_unique<AvroDeserializer>(header, file_reader.dataSchema(), true, true, settings);
|
auto deserializer = std::make_unique<AvroDeserializer>(header, file_reader.dataSchema(), true, true, settings);
|
||||||
MutableColumns columns = header.cloneEmptyColumns();
|
MutableColumns columns = header.cloneEmptyColumns();
|
||||||
@ -345,17 +354,14 @@ MutableColumns parseAvro(
|
|||||||
* 1) v<V>.metadata.json, where V - metadata version.
|
* 1) v<V>.metadata.json, where V - metadata version.
|
||||||
* 2) <V>-<random-uuid>.metadata.json, where V - metadata version
|
* 2) <V>-<random-uuid>.metadata.json, where V - metadata version
|
||||||
*/
|
*/
|
||||||
std::pair<Int32, String> getMetadataFileAndVersion(
|
std::pair<Int32, String>
|
||||||
ObjectStoragePtr object_storage,
|
getMetadataFileAndVersion(ObjectStoragePtr object_storage, const StorageObjectStorage::Configuration & configuration)
|
||||||
const StorageObjectStorage::Configuration & configuration)
|
|
||||||
{
|
{
|
||||||
const auto metadata_files = listFiles(*object_storage, configuration, "metadata", ".metadata.json");
|
const auto metadata_files = listFiles(*object_storage, configuration, "metadata", ".metadata.json");
|
||||||
if (metadata_files.empty())
|
if (metadata_files.empty())
|
||||||
{
|
{
|
||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::FILE_DOESNT_EXIST,
|
ErrorCodes::FILE_DOESNT_EXIST, "The metadata file for Iceberg table with path {} doesn't exist", configuration.getPath());
|
||||||
"The metadata file for Iceberg table with path {} doesn't exist",
|
|
||||||
configuration.getPath());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::pair<UInt32, String>> metadata_files_with_versions;
|
std::vector<std::pair<UInt32, String>> metadata_files_with_versions;
|
||||||
@ -372,7 +378,8 @@ std::pair<Int32, String> getMetadataFileAndVersion(
|
|||||||
version_str = String(file_name.begin(), file_name.begin() + file_name.find_first_of('-'));
|
version_str = String(file_name.begin(), file_name.begin() + file_name.find_first_of('-'));
|
||||||
|
|
||||||
if (!std::all_of(version_str.begin(), version_str.end(), isdigit))
|
if (!std::all_of(version_str.begin(), version_str.end(), isdigit))
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad metadata file name: {}. Expected vN.metadata.json where N is a number", file_name);
|
throw Exception(
|
||||||
|
ErrorCodes::BAD_ARGUMENTS, "Bad metadata file name: {}. Expected vN.metadata.json where N is a number", file_name);
|
||||||
metadata_files_with_versions.emplace_back(std::stoi(version_str), path);
|
metadata_files_with_versions.emplace_back(std::stoi(version_str), path);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -403,7 +410,7 @@ IcebergMetadata::create(ObjectStoragePtr object_storage, ConfigurationObserverPt
|
|||||||
const Poco::JSON::Object::Ptr & object = json.extract<Poco::JSON::Object::Ptr>();
|
const Poco::JSON::Object::Ptr & object = json.extract<Poco::JSON::Object::Ptr>();
|
||||||
|
|
||||||
auto format_version = object->getValue<int>("format-version");
|
auto format_version = object->getValue<int>("format-version");
|
||||||
auto [schema, schema_id]
|
auto [schema, schema_id, name_and_type_by_source_id]
|
||||||
= parseTableSchema(object, format_version, local_context->getSettingsRef()[Setting::iceberg_engine_ignore_schema_evolution]);
|
= parseTableSchema(object, format_version, local_context->getSettingsRef()[Setting::iceberg_engine_ignore_schema_evolution]);
|
||||||
|
|
||||||
auto current_snapshot_id = object->getValue<Int64>("current-snapshot-id");
|
auto current_snapshot_id = object->getValue<Int64>("current-snapshot-id");
|
||||||
@ -456,7 +463,6 @@ Strings IcebergMetadata::getDataFiles() const
|
|||||||
if (!data_files.empty())
|
if (!data_files.empty())
|
||||||
return data_files;
|
return data_files;
|
||||||
|
|
||||||
Strings manifest_files;
|
|
||||||
if (manifest_list_file.empty())
|
if (manifest_list_file.empty())
|
||||||
return data_files;
|
return data_files;
|
||||||
|
|
||||||
@ -465,7 +471,8 @@ Strings IcebergMetadata::getDataFiles() const
|
|||||||
auto context = getContext();
|
auto context = getContext();
|
||||||
StorageObjectStorageSource::ObjectInfo object_info(manifest_list_file);
|
StorageObjectStorageSource::ObjectInfo object_info(manifest_list_file);
|
||||||
auto manifest_list_buf = StorageObjectStorageSource::createReadBuffer(object_info, object_storage, context, log);
|
auto manifest_list_buf = StorageObjectStorageSource::createReadBuffer(object_info, object_storage, context, log);
|
||||||
auto manifest_list_file_reader = std::make_unique<avro::DataFileReaderBase>(std::make_unique<AvroInputStreamReadBufferAdapter>(*manifest_list_buf));
|
auto manifest_list_file_reader
|
||||||
|
= std::make_unique<avro::DataFileReaderBase>(std::make_unique<AvroInputStreamReadBufferAdapter>(*manifest_list_buf));
|
||||||
|
|
||||||
auto data_type = AvroSchemaReader::avroNodeToDataType(manifest_list_file_reader->dataSchema().root()->leafAt(0));
|
auto data_type = AvroSchemaReader::avroNodeToDataType(manifest_list_file_reader->dataSchema().root()->leafAt(0));
|
||||||
Block header{{data_type->createColumn(), data_type, "manifest_path"}};
|
Block header{{data_type->createColumn(), data_type, "manifest_path"}};
|
||||||
@ -511,7 +518,8 @@ Strings IcebergMetadata::getDataFiles() const
|
|||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::UNSUPPORTED_METHOD,
|
ErrorCodes::UNSUPPORTED_METHOD,
|
||||||
"Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not "
|
"Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not "
|
||||||
"supported. If you want to ignore schema evolution and read all files using latest schema saved on table creation, enable setting "
|
"supported. If you want to ignore schema evolution and read all files using latest schema saved on table creation, enable "
|
||||||
|
"setting "
|
||||||
"iceberg_engine_ignore_schema_evolution (Note: enabling this setting can lead to incorrect result)");
|
"iceberg_engine_ignore_schema_evolution (Note: enabling this setting can lead to incorrect result)");
|
||||||
|
|
||||||
avro::NodePtr root_node = manifest_file_reader->dataSchema().root();
|
avro::NodePtr root_node = manifest_file_reader->dataSchema().root();
|
||||||
@ -520,9 +528,7 @@ Strings IcebergMetadata::getDataFiles() const
|
|||||||
if (leaves_num < expected_min_num)
|
if (leaves_num < expected_min_num)
|
||||||
{
|
{
|
||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::BAD_ARGUMENTS,
|
ErrorCodes::BAD_ARGUMENTS, "Unexpected number of columns {}. Expected at least {}", root_node->leaves(), expected_min_num);
|
||||||
"Unexpected number of columns {}. Expected at least {}",
|
|
||||||
root_node->leaves(), expected_min_num);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
avro::NodePtr status_node = root_node->leafAt(0);
|
avro::NodePtr status_node = root_node->leafAt(0);
|
||||||
@ -615,7 +621,8 @@ Strings IcebergMetadata::getDataFiles() const
|
|||||||
{
|
{
|
||||||
Int32 content_type = content_int_column->getElement(i);
|
Int32 content_type = content_int_column->getElement(i);
|
||||||
if (DataFileContent(content_type) != DataFileContent::DATA)
|
if (DataFileContent(content_type) != DataFileContent::DATA)
|
||||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot read Iceberg table: positional and equality deletes are not supported");
|
throw Exception(
|
||||||
|
ErrorCodes::UNSUPPORTED_METHOD, "Cannot read Iceberg table: positional and equality deletes are not supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto status = status_int_column->getInt(i);
|
const auto status = status_int_column->getInt(i);
|
||||||
@ -635,14 +642,12 @@ Strings IcebergMetadata::getDataFiles() const
|
|||||||
{
|
{
|
||||||
LOG_TEST(log, "Processing data file for path: {}", file_path);
|
LOG_TEST(log, "Processing data file for path: {}", file_path);
|
||||||
files.insert(file_path);
|
files.insert(file_path);
|
||||||
|
data_files.push_back(file_path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
data_files = std::vector<std::string>(files.begin(), files.end());
|
|
||||||
return data_files;
|
return data_files;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
272
src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h
Normal file
272
src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h
Normal file
@ -0,0 +1,272 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Core/NamesAndTypes.h"
|
||||||
|
#include "DataTypes/DataTypeNullable.h"
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format.
|
||||||
|
|
||||||
|
# include <Core/Types.h>
|
||||||
|
# include <Disks/ObjectStorages/IObjectStorage.h>
|
||||||
|
# include <Interpreters/Context_fwd.h>
|
||||||
|
# include <Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h>
|
||||||
|
# include <Storages/ObjectStorage/StorageObjectStorage.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Useful links:
|
||||||
|
* - https://iceberg.apache.org/spec/
|
||||||
|
*
|
||||||
|
* Iceberg has two format versions, v1 and v2. The content of metadata files depends on the version.
|
||||||
|
*
|
||||||
|
* Unlike DeltaLake, Iceberg has several metadata layers: `table metadata`, `manifest list` and `manifest_files`.
|
||||||
|
* Metadata file - json file.
|
||||||
|
* Manifest list – an Avro file that lists manifest files; one per snapshot.
|
||||||
|
* Manifest file – an Avro file that lists data or delete files; a subset of a snapshot.
|
||||||
|
* All changes to table state create a new metadata file and replace the old metadata with an atomic swap.
|
||||||
|
*
|
||||||
|
* In order to find out which data files to read, we need to find the `manifest list`
|
||||||
|
* which corresponds to the latest snapshot. We find it by checking a list of snapshots
|
||||||
|
* in metadata's "snapshots" section.
|
||||||
|
*
|
||||||
|
* Example of metadata.json file.
|
||||||
|
* {
|
||||||
|
* "format-version" : 1,
|
||||||
|
* "table-uuid" : "ca2965ad-aae2-4813-8cf7-2c394e0c10f5",
|
||||||
|
* "location" : "/iceberg_data/db/table_name",
|
||||||
|
* "last-updated-ms" : 1680206743150,
|
||||||
|
* "last-column-id" : 2,
|
||||||
|
* "schema" : { "type" : "struct", "schema-id" : 0, "fields" : [ {<field1_info>}, {<field2_info>}, ... ] },
|
||||||
|
* "current-schema-id" : 0,
|
||||||
|
* "schemas" : [ ],
|
||||||
|
* ...
|
||||||
|
* "current-snapshot-id" : 2819310504515118887,
|
||||||
|
* "refs" : { "main" : { "snapshot-id" : 2819310504515118887, "type" : "branch" } },
|
||||||
|
* "snapshots" : [ {
|
||||||
|
* "snapshot-id" : 2819310504515118887,
|
||||||
|
* "timestamp-ms" : 1680206743150,
|
||||||
|
* "summary" : {
|
||||||
|
* "operation" : "append", "spark.app.id" : "local-1680206733239",
|
||||||
|
* "added-data-files" : "1", "added-records" : "100",
|
||||||
|
* "added-files-size" : "1070", "changed-partition-count" : "1",
|
||||||
|
* "total-records" : "100", "total-files-size" : "1070", "total-data-files" : "1", "total-delete-files" : "0",
|
||||||
|
* "total-position-deletes" : "0", "total-equality-deletes" : "0"
|
||||||
|
* },
|
||||||
|
* "manifest-list" : "/iceberg_data/db/table_name/metadata/snap-2819310504515118887-1-c87bfec7-d36c-4075-ad04-600b6b0f2020.avro",
|
||||||
|
* "schema-id" : 0
|
||||||
|
* } ],
|
||||||
|
* "statistics" : [ ],
|
||||||
|
* "snapshot-log" : [ ... ],
|
||||||
|
* "metadata-log" : [ ]
|
||||||
|
* }
|
||||||
|
*/
|
||||||
|
|
||||||
|
enum class PartitionTransform
|
||||||
|
{
|
||||||
|
Year,
|
||||||
|
Month,
|
||||||
|
Day,
|
||||||
|
Hour,
|
||||||
|
Unsupported
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CommonPartitionInfo
|
||||||
|
{
|
||||||
|
std::vector<ColumnPtr> partition_columns;
|
||||||
|
std::vector<PartitionTransform> partition_transforms;
|
||||||
|
std::vector<Int32> partition_source_ids;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SpecificSchemaPartitionInfo
|
||||||
|
{
|
||||||
|
std::vector<std::vector<Range>> ranges;
|
||||||
|
NamesAndTypesList partition_names_and_types;
|
||||||
|
};
|
||||||
|
|
||||||
|
class PartitionPruningProcessor
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
CommonPartitionInfo addCommonPartitionInfo(Poco::JSON::Array::Ptr partition_specification, const ColumnTuple * big_partition_tuple);
|
||||||
|
|
||||||
|
SpecificSchemaPartitionInfo getSpecificPartitionPruning(
|
||||||
|
const CommonPartitionInfo & common_info,
|
||||||
|
Int32 schema_version,
|
||||||
|
const std::unordered_map<Int32, NameAndTypePair> & name_and_type_by_source_id);
|
||||||
|
|
||||||
|
std::vector<bool> getPruningMask(const SpecificSchemaPartitionInfo & specific_info, const ActionsDAG * filter_dag, ContextPtr context);
|
||||||
|
|
||||||
|
std::vector<bool> getAllFilesMask(const ActionsDAG * filter_dag, ContextPtr context);
|
||||||
|
|
||||||
|
private:
|
||||||
|
static PartitionTransform getTransform(const String & transform_name)
|
||||||
|
{
|
||||||
|
if (transform_name == "year")
|
||||||
|
{
|
||||||
|
return PartitionTransform::Year;
|
||||||
|
}
|
||||||
|
else if (transform_name == "month")
|
||||||
|
{
|
||||||
|
return PartitionTransform::Month;
|
||||||
|
}
|
||||||
|
else if (transform_name == "day")
|
||||||
|
{
|
||||||
|
return PartitionTransform::Day;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return PartitionTransform::Unsupported;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static DateLUTImpl::Values getValues(Int32 value, PartitionTransform transform)
|
||||||
|
{
|
||||||
|
if (transform == PartitionTransform::Year)
|
||||||
|
{
|
||||||
|
return DateLUT::instance().lutIndexByYearSinceEpochStartsZeroIndexing(value);
|
||||||
|
}
|
||||||
|
else if (transform == PartitionTransform::Month)
|
||||||
|
{
|
||||||
|
return DateLUT::instance().lutIndexByMonthSinceEpochStartsZeroIndexing(static_cast<UInt32>(value));
|
||||||
|
}
|
||||||
|
else if (transform == PartitionTransform::Day)
|
||||||
|
{
|
||||||
|
return DateLUT::instance().getValues(static_cast<UInt16>(value));
|
||||||
|
}
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported partition transform for get day function: {}", transform);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Int64 getTime(Int32 value, PartitionTransform transform)
|
||||||
|
{
|
||||||
|
DateLUTImpl::Values values = getValues(value, transform);
|
||||||
|
// LOG_DEBUG(&Poco::Logger::get("Get field"), "Values: {}", values);
|
||||||
|
return values.date;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Int16 getDay(Int32 value, PartitionTransform transform)
|
||||||
|
{
|
||||||
|
DateLUTImpl::Time got_time = getTime(value, transform);
|
||||||
|
LOG_DEBUG(&Poco::Logger::get("Get field"), "Time: {}", got_time);
|
||||||
|
return DateLUT::instance().toDayNum(got_time);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Range
|
||||||
|
getPartitionRange(PartitionTransform partition_transform, UInt32 index, ColumnPtr partition_column, DataTypePtr column_data_type)
|
||||||
|
{
|
||||||
|
if (partition_transform == PartitionTransform::Year || partition_transform == PartitionTransform::Month
|
||||||
|
|| partition_transform == PartitionTransform::Day)
|
||||||
|
{
|
||||||
|
auto column = dynamic_cast<const ColumnNullable *>(partition_column.get())->getNestedColumnPtr();
|
||||||
|
const auto * casted_innner_column = assert_cast<const ColumnInt32 *>(column.get());
|
||||||
|
Int32 value = static_cast<Int32>(casted_innner_column->getInt(index));
|
||||||
|
LOG_DEBUG(&Poco::Logger::get("Partition"), "Partition value: {}, transform: {}", value, partition_transform);
|
||||||
|
auto nested_data_type = dynamic_cast<const DataTypeNullable *>(column_data_type.get())->getNestedType();
|
||||||
|
if (WhichDataType(nested_data_type).isDate())
|
||||||
|
{
|
||||||
|
const UInt16 begin_range_value = getDay(value, partition_transform);
|
||||||
|
const UInt16 end_range_value = getDay(value + 1, partition_transform);
|
||||||
|
return Range{begin_range_value, true, end_range_value, false};
|
||||||
|
}
|
||||||
|
else if (WhichDataType(nested_data_type).isDateTime64())
|
||||||
|
{
|
||||||
|
const UInt64 begin_range_value = getTime(value, partition_transform);
|
||||||
|
const UInt64 end_range_value = getTime(value + 1, partition_transform);
|
||||||
|
return Range{begin_range_value, true, end_range_value, false};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Partition transform {} is not supported for the type: {}",
|
||||||
|
partition_transform,
|
||||||
|
nested_data_type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported partition transform: {}", partition_transform);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unordered_map<String, CommonPartitionInfo> common_partition_info_by_manifest_file;
|
||||||
|
std::map<std::pair<String, Int32>, SpecificSchemaPartitionInfo> specific_partition_info_by_manifest_file_and_schema;
|
||||||
|
|
||||||
|
std::vector<CommonPartitionInfo> common_partition_infos;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class IcebergMetadata : public IDataLakeMetadata, private WithContext
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using ConfigurationObserverPtr = StorageObjectStorage::ConfigurationObserverPtr;
|
||||||
|
|
||||||
|
static constexpr auto name = "Iceberg";
|
||||||
|
|
||||||
|
enum class PartitionTransform
|
||||||
|
{
|
||||||
|
Year,
|
||||||
|
Month,
|
||||||
|
Day,
|
||||||
|
Hour,
|
||||||
|
Unsupported
|
||||||
|
};
|
||||||
|
|
||||||
|
IcebergMetadata(
|
||||||
|
ObjectStoragePtr object_storage_,
|
||||||
|
ConfigurationObserverPtr configuration_,
|
||||||
|
ContextPtr context_,
|
||||||
|
Int32 metadata_version_,
|
||||||
|
Int32 format_version_,
|
||||||
|
String manifest_list_file_,
|
||||||
|
Int32 current_schema_id_,
|
||||||
|
NamesAndTypesList schema_);
|
||||||
|
|
||||||
|
/// Get data files. On first request it reads manifest_list file and iterates through manifest files to find all data files.
|
||||||
|
/// All subsequent calls will return saved list of files (because it cannot be changed without changing metadata file)
|
||||||
|
Strings getDataFiles() const override;
|
||||||
|
|
||||||
|
/// Get table schema parsed from metadata.
|
||||||
|
NamesAndTypesList getTableSchema() const override { return schema; }
|
||||||
|
|
||||||
|
const std::unordered_map<String, String> & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; }
|
||||||
|
|
||||||
|
const DataLakePartitionColumns & getPartitionColumns() const override { return partition_columns; }
|
||||||
|
|
||||||
|
bool operator==(const IDataLakeMetadata & other) const override
|
||||||
|
{
|
||||||
|
const auto * iceberg_metadata = dynamic_cast<const IcebergMetadata *>(&other);
|
||||||
|
return iceberg_metadata && getVersion() == iceberg_metadata->getVersion();
|
||||||
|
}
|
||||||
|
|
||||||
|
static DataLakeMetadataPtr create(ObjectStoragePtr object_storage, ConfigurationObserverPtr configuration, ContextPtr local_context);
|
||||||
|
|
||||||
|
Strings makePartitionPruning(const ActionsDAG & filter_dag);
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t getVersion() const { return metadata_version; }
|
||||||
|
|
||||||
|
const ObjectStoragePtr object_storage;
|
||||||
|
const ConfigurationObserverPtr configuration;
|
||||||
|
Int32 metadata_version;
|
||||||
|
Int32 format_version;
|
||||||
|
String manifest_list_file;
|
||||||
|
Int32 current_schema_id;
|
||||||
|
NamesAndTypesList schema;
|
||||||
|
std::unordered_map<String, String> column_name_to_physical_name;
|
||||||
|
DataLakePartitionColumns partition_columns;
|
||||||
|
LoggerPtr log;
|
||||||
|
|
||||||
|
std::unordered_map<Int32, NameAndTypePair> name_and_type_by_source_id;
|
||||||
|
|
||||||
|
std::vector<std::pair<String, Int32>> manifest_files_with_start_index;
|
||||||
|
|
||||||
|
mutable Strings data_files;
|
||||||
|
mutable Strings manifest_files;
|
||||||
|
|
||||||
|
PartitionPruningProcessor pruning_processor;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,136 @@
|
|||||||
|
#include <exception>
|
||||||
|
#include "Common/DateLUT.h"
|
||||||
|
#include "Core/NamesAndTypes.h"
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#if USE_AVRO
|
||||||
|
|
||||||
|
# include <Columns/ColumnString.h>
|
||||||
|
# include <Columns/ColumnTuple.h>
|
||||||
|
# include <Columns/IColumn.h>
|
||||||
|
# include <Core/Settings.h>
|
||||||
|
# include <DataTypes/DataTypeArray.h>
|
||||||
|
# include <DataTypes/DataTypeDate.h>
|
||||||
|
# include <DataTypes/DataTypeDateTime64.h>
|
||||||
|
# include <DataTypes/DataTypeFactory.h>
|
||||||
|
# include <DataTypes/DataTypeFixedString.h>
|
||||||
|
# include <DataTypes/DataTypeMap.h>
|
||||||
|
# include <DataTypes/DataTypeNullable.h>
|
||||||
|
# include <DataTypes/DataTypeString.h>
|
||||||
|
# include <DataTypes/DataTypeTuple.h>
|
||||||
|
# include <DataTypes/DataTypeUUID.h>
|
||||||
|
# include <DataTypes/DataTypesDecimal.h>
|
||||||
|
# include <DataTypes/DataTypesNumber.h>
|
||||||
|
# include <Formats/FormatFactory.h>
|
||||||
|
# include <IO/ReadBufferFromFileBase.h>
|
||||||
|
# include <IO/ReadBufferFromString.h>
|
||||||
|
# include <IO/ReadHelpers.h>
|
||||||
|
# include <Processors/Formats/Impl/AvroRowInputFormat.h>
|
||||||
|
# include <Storages/ObjectStorage/DataLakes/Common.h>
|
||||||
|
# include <Storages/ObjectStorage/StorageObjectStorageSource.h>
|
||||||
|
# include <Common/logger_useful.h>
|
||||||
|
|
||||||
|
|
||||||
|
# include <Poco/JSON/Array.h>
|
||||||
|
# include <Poco/JSON/Object.h>
|
||||||
|
# include <Poco/JSON/Parser.h>
|
||||||
|
|
||||||
|
# include <DataFile.hh>
|
||||||
|
|
||||||
|
// # include <filesystem>
|
||||||
|
|
||||||
|
# include <Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h>
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
CommonPartitionInfo
|
||||||
|
PartitionPruningProcessor::getCommonPartitionInfo(Poco::JSON::Array::Ptr partition_specification, const ColumnTuple * big_partition_tuple)
|
||||||
|
{
|
||||||
|
CommonPartitionInfo common_info;
|
||||||
|
for (size_t i = 0; i != partition_specification->size(); ++i)
|
||||||
|
{
|
||||||
|
auto current_field = partition_specification->getObject(static_cast<UInt32>(i));
|
||||||
|
|
||||||
|
auto source_id = current_field->getValue<Int32>("source-id");
|
||||||
|
PartitionTransform transform = getTransform(current_field->getValue<String>("transform"));
|
||||||
|
|
||||||
|
if (transform == PartitionTransform::Unsupported)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto partition_name = current_field->getValue<String>("name");
|
||||||
|
LOG_DEBUG(&Poco::Logger::get("Partition Spec"), "Name: {}", partition_name);
|
||||||
|
|
||||||
|
common_info.partition_columns.push_back(big_partition_tuple->getColumnPtr(i));
|
||||||
|
common_info.partition_transforms.push_back(transform);
|
||||||
|
common_info.partition_source_ids.push_back(source_id);
|
||||||
|
}
|
||||||
|
return common_info;
|
||||||
|
}
|
||||||
|
|
||||||
|
SpecificSchemaPartitionInfo PartitionPruningProcessor::getSpecificPartitionPruning(
|
||||||
|
const CommonPartitionInfo & common_info,
|
||||||
|
[[maybe_unused]] Int32 schema_version,
|
||||||
|
const std::unordered_map<Int32, NameAndTypePair> & name_and_type_by_source_id)
|
||||||
|
{
|
||||||
|
SpecificSchemaPartitionInfo specific_info;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < common_info.partition_columns.size(); ++i)
|
||||||
|
{
|
||||||
|
Int32 source_id = common_info.partition_source_ids[i];
|
||||||
|
auto it = name_and_type_by_source_id.find(source_id);
|
||||||
|
if (it == name_and_type_by_source_id.end())
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
size_t column_size = common_info.partition_columns[i]->size();
|
||||||
|
if (specific_info.ranges.empty())
|
||||||
|
{
|
||||||
|
specific_info.ranges.resize(column_size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
assert(specific_info.ranges.size() == column_size);
|
||||||
|
}
|
||||||
|
NameAndTypePair name_and_type = it->second;
|
||||||
|
specific_info.partition_names_and_types.push_back(name_and_type);
|
||||||
|
for (size_t j = 0; j < column_size; ++j)
|
||||||
|
{
|
||||||
|
specific_info.ranges[j].push_back(getPartitionRange(
|
||||||
|
common_info.partition_transforms[i], static_cast<UInt32>(j), common_info.partition_columns[i], name_and_type.type));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return specific_info;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<bool> PartitionPruningProcessor::getPruningMask(
|
||||||
|
const SpecificSchemaPartitionInfo & specific_info, const ActionsDAG * filter_dag, ContextPtr context)
|
||||||
|
{
|
||||||
|
std::vector<bool> pruning_mask;
|
||||||
|
if (!specific_info.partition_names_and_types.empty())
|
||||||
|
{
|
||||||
|
ExpressionActionsPtr partition_minmax_idx_expr = std::make_shared<ExpressionActions>(
|
||||||
|
ActionsDAG(specific_info.partition_names_and_types), ExpressionActionsSettings::fromContext(context));
|
||||||
|
const KeyCondition partition_key_condition(
|
||||||
|
filter_dag, context, specific_info.partition_names_and_types.getNames(), partition_minmax_idx_expr);
|
||||||
|
for (size_t j = 0; j < specific_info.ranges.size(); ++j)
|
||||||
|
{
|
||||||
|
if (!partition_key_condition.checkInHyperrectangle(specific_info.ranges[j], specific_info.partition_names_and_types.getTypes())
|
||||||
|
.can_be_true)
|
||||||
|
{
|
||||||
|
LOG_DEBUG(&Poco::Logger::get("Partition pruning"), "Partition pruning was successful for file: {}", j);
|
||||||
|
pruning_mask.push_back(false);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
LOG_DEBUG(&Poco::Logger::get("Partition pruning"), "Partition pruning failed for file: {}", j);
|
||||||
|
pruning_mask.push_back(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pruning_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -1,117 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
|
|
||||||
#if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format.
|
|
||||||
|
|
||||||
#include <Interpreters/Context_fwd.h>
|
|
||||||
#include <Core/Types.h>
|
|
||||||
#include <Disks/ObjectStorages/IObjectStorage.h>
|
|
||||||
#include <Storages/ObjectStorage/StorageObjectStorage.h>
|
|
||||||
#include <Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h>
|
|
||||||
|
|
||||||
namespace DB
|
|
||||||
{
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Useful links:
|
|
||||||
* - https://iceberg.apache.org/spec/
|
|
||||||
*
|
|
||||||
* Iceberg has two format versions, v1 and v2. The content of metadata files depends on the version.
|
|
||||||
*
|
|
||||||
* Unlike DeltaLake, Iceberg has several metadata layers: `table metadata`, `manifest list` and `manifest_files`.
|
|
||||||
* Metadata file - json file.
|
|
||||||
* Manifest list – an Avro file that lists manifest files; one per snapshot.
|
|
||||||
* Manifest file – an Avro file that lists data or delete files; a subset of a snapshot.
|
|
||||||
* All changes to table state create a new metadata file and replace the old metadata with an atomic swap.
|
|
||||||
*
|
|
||||||
* In order to find out which data files to read, we need to find the `manifest list`
|
|
||||||
* which corresponds to the latest snapshot. We find it by checking a list of snapshots
|
|
||||||
* in metadata's "snapshots" section.
|
|
||||||
*
|
|
||||||
* Example of metadata.json file.
|
|
||||||
* {
|
|
||||||
* "format-version" : 1,
|
|
||||||
* "table-uuid" : "ca2965ad-aae2-4813-8cf7-2c394e0c10f5",
|
|
||||||
* "location" : "/iceberg_data/db/table_name",
|
|
||||||
* "last-updated-ms" : 1680206743150,
|
|
||||||
* "last-column-id" : 2,
|
|
||||||
* "schema" : { "type" : "struct", "schema-id" : 0, "fields" : [ {<field1_info>}, {<field2_info>}, ... ] },
|
|
||||||
* "current-schema-id" : 0,
|
|
||||||
* "schemas" : [ ],
|
|
||||||
* ...
|
|
||||||
* "current-snapshot-id" : 2819310504515118887,
|
|
||||||
* "refs" : { "main" : { "snapshot-id" : 2819310504515118887, "type" : "branch" } },
|
|
||||||
* "snapshots" : [ {
|
|
||||||
* "snapshot-id" : 2819310504515118887,
|
|
||||||
* "timestamp-ms" : 1680206743150,
|
|
||||||
* "summary" : {
|
|
||||||
* "operation" : "append", "spark.app.id" : "local-1680206733239",
|
|
||||||
* "added-data-files" : "1", "added-records" : "100",
|
|
||||||
* "added-files-size" : "1070", "changed-partition-count" : "1",
|
|
||||||
* "total-records" : "100", "total-files-size" : "1070", "total-data-files" : "1", "total-delete-files" : "0",
|
|
||||||
* "total-position-deletes" : "0", "total-equality-deletes" : "0"
|
|
||||||
* },
|
|
||||||
* "manifest-list" : "/iceberg_data/db/table_name/metadata/snap-2819310504515118887-1-c87bfec7-d36c-4075-ad04-600b6b0f2020.avro",
|
|
||||||
* "schema-id" : 0
|
|
||||||
* } ],
|
|
||||||
* "statistics" : [ ],
|
|
||||||
* "snapshot-log" : [ ... ],
|
|
||||||
* "metadata-log" : [ ]
|
|
||||||
* }
|
|
||||||
*/
|
|
||||||
class IcebergMetadata : public IDataLakeMetadata, private WithContext
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
using ConfigurationObserverPtr = StorageObjectStorage::ConfigurationObserverPtr;
|
|
||||||
|
|
||||||
static constexpr auto name = "Iceberg";
|
|
||||||
|
|
||||||
IcebergMetadata(
|
|
||||||
ObjectStoragePtr object_storage_,
|
|
||||||
ConfigurationObserverPtr configuration_,
|
|
||||||
ContextPtr context_,
|
|
||||||
Int32 metadata_version_,
|
|
||||||
Int32 format_version_,
|
|
||||||
String manifest_list_file_,
|
|
||||||
Int32 current_schema_id_,
|
|
||||||
NamesAndTypesList schema_);
|
|
||||||
|
|
||||||
/// Get data files. On first request it reads manifest_list file and iterates through manifest files to find all data files.
|
|
||||||
/// All subsequent calls will return saved list of files (because it cannot be changed without changing metadata file)
|
|
||||||
Strings getDataFiles() const override;
|
|
||||||
|
|
||||||
/// Get table schema parsed from metadata.
|
|
||||||
NamesAndTypesList getTableSchema() const override { return schema; }
|
|
||||||
|
|
||||||
const std::unordered_map<String, String> & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; }
|
|
||||||
|
|
||||||
const DataLakePartitionColumns & getPartitionColumns() const override { return partition_columns; }
|
|
||||||
|
|
||||||
bool operator ==(const IDataLakeMetadata & other) const override
|
|
||||||
{
|
|
||||||
const auto * iceberg_metadata = dynamic_cast<const IcebergMetadata *>(&other);
|
|
||||||
return iceberg_metadata && getVersion() == iceberg_metadata->getVersion();
|
|
||||||
}
|
|
||||||
|
|
||||||
static DataLakeMetadataPtr create(ObjectStoragePtr object_storage, ConfigurationObserverPtr configuration, ContextPtr local_context);
|
|
||||||
|
|
||||||
private:
|
|
||||||
size_t getVersion() const { return metadata_version; }
|
|
||||||
|
|
||||||
const ObjectStoragePtr object_storage;
|
|
||||||
const ConfigurationObserverPtr configuration;
|
|
||||||
Int32 metadata_version;
|
|
||||||
Int32 format_version;
|
|
||||||
String manifest_list_file;
|
|
||||||
Int32 current_schema_id;
|
|
||||||
NamesAndTypesList schema;
|
|
||||||
mutable Strings data_files;
|
|
||||||
std::unordered_map<String, String> column_name_to_physical_name;
|
|
||||||
DataLakePartitionColumns partition_columns;
|
|
||||||
LoggerPtr log;
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
@ -226,6 +226,26 @@ template class TableFunctionObjectStorage<HDFSClusterDefinition, StorageHDFSConf
|
|||||||
#endif
|
#endif
|
||||||
template class TableFunctionObjectStorage<LocalDefinition, StorageLocalConfiguration>;
|
template class TableFunctionObjectStorage<LocalDefinition, StorageLocalConfiguration>;
|
||||||
|
|
||||||
|
#if USE_AVRO && USE_AWS_S3
|
||||||
|
template class TableFunctionObjectStorage<IcebergS3ClusterDefinition, StorageS3IcebergConfiguration>;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if USE_AVRO && USE_AZURE_BLOB_STORAGE
|
||||||
|
template class TableFunctionObjectStorage<IcebergAzureClusterDefinition, StorageAzureIcebergConfiguration>;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if USE_AVRO && USE_HDFS
|
||||||
|
template class TableFunctionObjectStorage<IcebergHDFSClusterDefinition, StorageHDFSIcebergConfiguration>;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if USE_PARQUET && USE_AWS_S3
|
||||||
|
template class TableFunctionObjectStorage<DeltaLakeClusterDefinition, StorageS3DeltaLakeConfiguration>;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if USE_AWS_S3
|
||||||
|
template class TableFunctionObjectStorage<HudiClusterDefinition, StorageS3HudiConfiguration>;
|
||||||
|
#endif
|
||||||
|
|
||||||
#if USE_AVRO
|
#if USE_AVRO
|
||||||
void registerTableFunctionIceberg(TableFunctionFactory & factory)
|
void registerTableFunctionIceberg(TableFunctionFactory & factory)
|
||||||
{
|
{
|
||||||
|
@ -96,7 +96,7 @@ void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory)
|
|||||||
{
|
{
|
||||||
.documentation = {
|
.documentation = {
|
||||||
.description=R"(The table function can be used to read the data stored on HDFS in parallel for many nodes in a specified cluster.)",
|
.description=R"(The table function can be used to read the data stored on HDFS in parallel for many nodes in a specified cluster.)",
|
||||||
.examples{{"HDFSCluster", "SELECT * FROM HDFSCluster(cluster_name, uri, format)", ""}}},
|
.examples{{"HDFSCluster", "SELECT * FROM HDFSCluster(cluster, uri, format)", ""}}},
|
||||||
.allow_readonly = false
|
.allow_readonly = false
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
@ -105,15 +105,77 @@ void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory)
|
|||||||
UNUSED(factory);
|
UNUSED(factory);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if USE_AVRO
|
||||||
|
void registerTableFunctionIcebergCluster(TableFunctionFactory & factory)
|
||||||
|
{
|
||||||
|
UNUSED(factory);
|
||||||
|
|
||||||
#if USE_AWS_S3
|
#if USE_AWS_S3
|
||||||
template class TableFunctionObjectStorageCluster<S3ClusterDefinition, StorageS3Configuration>;
|
factory.registerFunction<TableFunctionIcebergS3Cluster>(
|
||||||
|
{.documentation
|
||||||
|
= {.description = R"(The table function can be used to read the Iceberg table stored on S3 object store in parallel for many nodes in a specified cluster.)",
|
||||||
|
.examples{{"icebergS3Cluster", "SELECT * FROM icebergS3Cluster(cluster, url, [, NOSIGN | access_key_id, secret_access_key, [session_token]], format, [,compression])", ""}},
|
||||||
|
.categories{"DataLake"}},
|
||||||
|
.allow_readonly = false});
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if USE_AZURE_BLOB_STORAGE
|
#if USE_AZURE_BLOB_STORAGE
|
||||||
template class TableFunctionObjectStorageCluster<AzureClusterDefinition, StorageAzureConfiguration>;
|
factory.registerFunction<TableFunctionIcebergAzureCluster>(
|
||||||
|
{.documentation
|
||||||
|
= {.description = R"(The table function can be used to read the Iceberg table stored on Azure object store in parallel for many nodes in a specified cluster.)",
|
||||||
|
.examples{{"icebergAzureCluster", "SELECT * FROM icebergAzureCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression])", ""}},
|
||||||
|
.categories{"DataLake"}},
|
||||||
|
.allow_readonly = false});
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if USE_HDFS
|
#if USE_HDFS
|
||||||
template class TableFunctionObjectStorageCluster<HDFSClusterDefinition, StorageHDFSConfiguration>;
|
factory.registerFunction<TableFunctionIcebergHDFSCluster>(
|
||||||
|
{.documentation
|
||||||
|
= {.description = R"(The table function can be used to read the Iceberg table stored on HDFS virtual filesystem in parallel for many nodes in a specified cluster.)",
|
||||||
|
.examples{{"icebergHDFSCluster", "SELECT * FROM icebergHDFSCluster(cluster, uri, [format], [structure], [compression_method])", ""}},
|
||||||
|
.categories{"DataLake"}},
|
||||||
|
.allow_readonly = false});
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if USE_AWS_S3
|
||||||
|
#if USE_PARQUET
|
||||||
|
void registerTableFunctionDeltaLakeCluster(TableFunctionFactory & factory)
|
||||||
|
{
|
||||||
|
factory.registerFunction<TableFunctionDeltaLakeCluster>(
|
||||||
|
{.documentation
|
||||||
|
= {.description = R"(The table function can be used to read the DeltaLake table stored on object store in parallel for many nodes in a specified cluster.)",
|
||||||
|
.examples{{"deltaLakeCluster", "SELECT * FROM deltaLakeCluster(cluster, url, access_key_id, secret_access_key)", ""}},
|
||||||
|
.categories{"DataLake"}},
|
||||||
|
.allow_readonly = false});
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void registerTableFunctionHudiCluster(TableFunctionFactory & factory)
|
||||||
|
{
|
||||||
|
factory.registerFunction<TableFunctionHudiCluster>(
|
||||||
|
{.documentation
|
||||||
|
= {.description = R"(The table function can be used to read the Hudi table stored on object store in parallel for many nodes in a specified cluster.)",
|
||||||
|
.examples{{"hudiCluster", "SELECT * FROM hudiCluster(cluster, url, access_key_id, secret_access_key)", ""}},
|
||||||
|
.categories{"DataLake"}},
|
||||||
|
.allow_readonly = false});
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void registerDataLakeClusterTableFunctions(TableFunctionFactory & factory)
|
||||||
|
{
|
||||||
|
UNUSED(factory);
|
||||||
|
#if USE_AVRO
|
||||||
|
registerTableFunctionIcebergCluster(factory);
|
||||||
|
#endif
|
||||||
|
#if USE_AWS_S3
|
||||||
|
#if USE_PARQUET
|
||||||
|
registerTableFunctionDeltaLakeCluster(factory);
|
||||||
|
#endif
|
||||||
|
registerTableFunctionHudiCluster(factory);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
@ -33,6 +33,36 @@ struct HDFSClusterDefinition
|
|||||||
static constexpr auto storage_type_name = "HDFSCluster";
|
static constexpr auto storage_type_name = "HDFSCluster";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct IcebergS3ClusterDefinition
|
||||||
|
{
|
||||||
|
static constexpr auto name = "icebergS3Cluster";
|
||||||
|
static constexpr auto storage_type_name = "IcebergS3Cluster";
|
||||||
|
};
|
||||||
|
|
||||||
|
struct IcebergAzureClusterDefinition
|
||||||
|
{
|
||||||
|
static constexpr auto name = "icebergAzureCluster";
|
||||||
|
static constexpr auto storage_type_name = "IcebergAzureCluster";
|
||||||
|
};
|
||||||
|
|
||||||
|
struct IcebergHDFSClusterDefinition
|
||||||
|
{
|
||||||
|
static constexpr auto name = "icebergHDFSCluster";
|
||||||
|
static constexpr auto storage_type_name = "IcebergHDFSCluster";
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DeltaLakeClusterDefinition
|
||||||
|
{
|
||||||
|
static constexpr auto name = "deltaLakeCluster";
|
||||||
|
static constexpr auto storage_type_name = "DeltaLakeS3Cluster";
|
||||||
|
};
|
||||||
|
|
||||||
|
struct HudiClusterDefinition
|
||||||
|
{
|
||||||
|
static constexpr auto name = "hudiCluster";
|
||||||
|
static constexpr auto storage_type_name = "HudiS3Cluster";
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class implementing s3/hdfs/azureBlobStorageCluster(...) table functions,
|
* Class implementing s3/hdfs/azureBlobStorageCluster(...) table functions,
|
||||||
* which allow to process many files from S3/HDFS/Azure blob storage on a specific cluster.
|
* which allow to process many files from S3/HDFS/Azure blob storage on a specific cluster.
|
||||||
@ -79,4 +109,25 @@ using TableFunctionAzureBlobCluster = TableFunctionObjectStorageCluster<AzureClu
|
|||||||
#if USE_HDFS
|
#if USE_HDFS
|
||||||
using TableFunctionHDFSCluster = TableFunctionObjectStorageCluster<HDFSClusterDefinition, StorageHDFSConfiguration>;
|
using TableFunctionHDFSCluster = TableFunctionObjectStorageCluster<HDFSClusterDefinition, StorageHDFSConfiguration>;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if USE_AVRO && USE_AWS_S3
|
||||||
|
using TableFunctionIcebergS3Cluster = TableFunctionObjectStorageCluster<IcebergS3ClusterDefinition, StorageS3IcebergConfiguration>;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if USE_AVRO && USE_AZURE_BLOB_STORAGE
|
||||||
|
using TableFunctionIcebergAzureCluster = TableFunctionObjectStorageCluster<IcebergAzureClusterDefinition, StorageAzureIcebergConfiguration>;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if USE_AVRO && USE_HDFS
|
||||||
|
using TableFunctionIcebergHDFSCluster = TableFunctionObjectStorageCluster<IcebergHDFSClusterDefinition, StorageHDFSIcebergConfiguration>;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if USE_AWS_S3 && USE_PARQUET
|
||||||
|
using TableFunctionDeltaLakeCluster = TableFunctionObjectStorageCluster<DeltaLakeClusterDefinition, StorageS3DeltaLakeConfiguration>;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if USE_AWS_S3
|
||||||
|
using TableFunctionHudiCluster = TableFunctionObjectStorageCluster<HudiClusterDefinition, StorageS3HudiConfiguration>;
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -66,6 +66,7 @@ void registerTableFunctions(bool use_legacy_mongodb_integration [[maybe_unused]]
|
|||||||
registerTableFunctionObjectStorage(factory);
|
registerTableFunctionObjectStorage(factory);
|
||||||
registerTableFunctionObjectStorageCluster(factory);
|
registerTableFunctionObjectStorageCluster(factory);
|
||||||
registerDataLakeTableFunctions(factory);
|
registerDataLakeTableFunctions(factory);
|
||||||
|
registerDataLakeClusterTableFunctions(factory);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -70,6 +70,7 @@ void registerTableFunctionExplain(TableFunctionFactory & factory);
|
|||||||
void registerTableFunctionObjectStorage(TableFunctionFactory & factory);
|
void registerTableFunctionObjectStorage(TableFunctionFactory & factory);
|
||||||
void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory);
|
void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory);
|
||||||
void registerDataLakeTableFunctions(TableFunctionFactory & factory);
|
void registerDataLakeTableFunctions(TableFunctionFactory & factory);
|
||||||
|
void registerDataLakeClusterTableFunctions(TableFunctionFactory & factory);
|
||||||
|
|
||||||
void registerTableFunctionTimeSeries(TableFunctionFactory & factory);
|
void registerTableFunctionTimeSeries(TableFunctionFactory & factory);
|
||||||
|
|
||||||
|
@ -299,8 +299,6 @@ class TagAttrs:
|
|||||||
|
|
||||||
# Only one latest can exist
|
# Only one latest can exist
|
||||||
latest: ClickHouseVersion
|
latest: ClickHouseVersion
|
||||||
# Only one can be a major one (the most fresh per a year)
|
|
||||||
majors: Dict[int, ClickHouseVersion]
|
|
||||||
# Only one lts version can exist
|
# Only one lts version can exist
|
||||||
lts: Optional[ClickHouseVersion]
|
lts: Optional[ClickHouseVersion]
|
||||||
|
|
||||||
@ -345,14 +343,6 @@ def ldf_tags(version: ClickHouseVersion, distro: str, tag_attrs: TagAttrs) -> st
|
|||||||
tags.append("lts")
|
tags.append("lts")
|
||||||
tags.append(f"lts-{distro}")
|
tags.append(f"lts-{distro}")
|
||||||
|
|
||||||
# If the tag `22`, `23`, `24` etc. should be included in the tags
|
|
||||||
with_major = tag_attrs.majors.get(version.major) in (None, version)
|
|
||||||
if with_major:
|
|
||||||
tag_attrs.majors[version.major] = version
|
|
||||||
if without_distro:
|
|
||||||
tags.append(f"{version.major}")
|
|
||||||
tags.append(f"{version.major}-{distro}")
|
|
||||||
|
|
||||||
# Add all normal tags
|
# Add all normal tags
|
||||||
for tag in (
|
for tag in (
|
||||||
f"{version.major}.{version.minor}",
|
f"{version.major}.{version.minor}",
|
||||||
@ -384,7 +374,7 @@ def generate_ldf(args: argparse.Namespace) -> None:
|
|||||||
args.directory / git_runner(f"git -C {args.directory} rev-parse --show-cdup")
|
args.directory / git_runner(f"git -C {args.directory} rev-parse --show-cdup")
|
||||||
).absolute()
|
).absolute()
|
||||||
lines = ldf_header(git, directory)
|
lines = ldf_header(git, directory)
|
||||||
tag_attrs = TagAttrs(versions[-1], {}, None)
|
tag_attrs = TagAttrs(versions[-1], None)
|
||||||
|
|
||||||
# We iterate from the most recent to the oldest version
|
# We iterate from the most recent to the oldest version
|
||||||
for version in reversed(versions):
|
for version in reversed(versions):
|
||||||
|
@ -378,7 +378,7 @@ def test_reload_via_client(cluster, zk):
|
|||||||
configure_from_zk(zk)
|
configure_from_zk(zk)
|
||||||
break
|
break
|
||||||
except QueryRuntimeException:
|
except QueryRuntimeException:
|
||||||
logging.exception("The new socket is not binded yet")
|
logging.exception("The new socket is not bound yet")
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
if exception:
|
if exception:
|
||||||
|
@ -0,0 +1,20 @@
|
|||||||
|
<clickhouse>
|
||||||
|
<remote_servers>
|
||||||
|
<cluster_simple>
|
||||||
|
<shard>
|
||||||
|
<replica>
|
||||||
|
<host>node1</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
<replica>
|
||||||
|
<host>node2</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
<replica>
|
||||||
|
<host>node3</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
</shard>
|
||||||
|
</cluster_simple>
|
||||||
|
</remote_servers>
|
||||||
|
</clickhouse>
|
@ -0,0 +1,6 @@
|
|||||||
|
<clickhouse>
|
||||||
|
<query_log>
|
||||||
|
<database>system</database>
|
||||||
|
<table>query_log</table>
|
||||||
|
</query_log>
|
||||||
|
</clickhouse>
|
@ -73,14 +73,38 @@ def started_cluster():
|
|||||||
cluster.add_instance(
|
cluster.add_instance(
|
||||||
"node1",
|
"node1",
|
||||||
main_configs=[
|
main_configs=[
|
||||||
|
"configs/config.d/query_log.xml",
|
||||||
|
"configs/config.d/cluster.xml",
|
||||||
"configs/config.d/named_collections.xml",
|
"configs/config.d/named_collections.xml",
|
||||||
"configs/config.d/filesystem_caches.xml",
|
"configs/config.d/filesystem_caches.xml",
|
||||||
],
|
],
|
||||||
user_configs=["configs/users.d/users.xml"],
|
user_configs=["configs/users.d/users.xml"],
|
||||||
with_minio=True,
|
with_minio=True,
|
||||||
with_azurite=True,
|
with_azurite=True,
|
||||||
stay_alive=True,
|
|
||||||
with_hdfs=with_hdfs,
|
with_hdfs=with_hdfs,
|
||||||
|
stay_alive=True,
|
||||||
|
)
|
||||||
|
cluster.add_instance(
|
||||||
|
"node2",
|
||||||
|
main_configs=[
|
||||||
|
"configs/config.d/query_log.xml",
|
||||||
|
"configs/config.d/cluster.xml",
|
||||||
|
"configs/config.d/named_collections.xml",
|
||||||
|
"configs/config.d/filesystem_caches.xml",
|
||||||
|
],
|
||||||
|
user_configs=["configs/users.d/users.xml"],
|
||||||
|
stay_alive=True,
|
||||||
|
)
|
||||||
|
cluster.add_instance(
|
||||||
|
"node3",
|
||||||
|
main_configs=[
|
||||||
|
"configs/config.d/query_log.xml",
|
||||||
|
"configs/config.d/cluster.xml",
|
||||||
|
"configs/config.d/named_collections.xml",
|
||||||
|
"configs/config.d/filesystem_caches.xml",
|
||||||
|
],
|
||||||
|
user_configs=["configs/users.d/users.xml"],
|
||||||
|
stay_alive=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.info("Starting cluster...")
|
logging.info("Starting cluster...")
|
||||||
@ -182,6 +206,7 @@ def get_creation_expression(
|
|||||||
cluster,
|
cluster,
|
||||||
format="Parquet",
|
format="Parquet",
|
||||||
table_function=False,
|
table_function=False,
|
||||||
|
run_on_cluster=False,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
if storage_type == "s3":
|
if storage_type == "s3":
|
||||||
@ -189,35 +214,56 @@ def get_creation_expression(
|
|||||||
bucket = kwargs["bucket"]
|
bucket = kwargs["bucket"]
|
||||||
else:
|
else:
|
||||||
bucket = cluster.minio_bucket
|
bucket = cluster.minio_bucket
|
||||||
print(bucket)
|
|
||||||
if table_function:
|
if run_on_cluster:
|
||||||
return f"icebergS3(s3, filename = 'iceberg_data/default/{table_name}/', format={format}, url = 'http://minio1:9001/{bucket}/')"
|
assert table_function
|
||||||
|
return f"icebergS3Cluster('cluster_simple', s3, filename = 'iceberg_data/default/{table_name}/', format={format}, url = 'http://minio1:9001/{bucket}/')"
|
||||||
else:
|
else:
|
||||||
return f"""
|
if table_function:
|
||||||
DROP TABLE IF EXISTS {table_name};
|
return f"icebergS3(s3, filename = 'iceberg_data/default/{table_name}/', format={format}, url = 'http://minio1:9001/{bucket}/')"
|
||||||
CREATE TABLE {table_name}
|
else:
|
||||||
ENGINE=IcebergS3(s3, filename = 'iceberg_data/default/{table_name}/', format={format}, url = 'http://minio1:9001/{bucket}/')"""
|
return f"""
|
||||||
|
DROP TABLE IF EXISTS {table_name};
|
||||||
|
CREATE TABLE {table_name}
|
||||||
|
ENGINE=IcebergS3(s3, filename = 'iceberg_data/default/{table_name}/', format={format}, url = 'http://minio1:9001/{bucket}/')"""
|
||||||
|
|
||||||
elif storage_type == "azure":
|
elif storage_type == "azure":
|
||||||
if table_function:
|
if run_on_cluster:
|
||||||
|
assert table_function
|
||||||
return f"""
|
return f"""
|
||||||
icebergAzure(azure, container = '{cluster.azure_container_name}', storage_account_url = '{cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]}', blob_path = '/iceberg_data/default/{table_name}/', format={format})
|
icebergAzureCluster('cluster_simple', azure, container = '{cluster.azure_container_name}', storage_account_url = '{cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]}', blob_path = '/iceberg_data/default/{table_name}/', format={format})
|
||||||
"""
|
"""
|
||||||
else:
|
else:
|
||||||
return f"""
|
if table_function:
|
||||||
DROP TABLE IF EXISTS {table_name};
|
return f"""
|
||||||
CREATE TABLE {table_name}
|
icebergAzure(azure, container = '{cluster.azure_container_name}', storage_account_url = '{cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]}', blob_path = '/iceberg_data/default/{table_name}/', format={format})
|
||||||
ENGINE=IcebergAzure(azure, container = {cluster.azure_container_name}, storage_account_url = '{cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]}', blob_path = '/iceberg_data/default/{table_name}/', format={format})"""
|
"""
|
||||||
|
else:
|
||||||
|
return f"""
|
||||||
|
DROP TABLE IF EXISTS {table_name};
|
||||||
|
CREATE TABLE {table_name}
|
||||||
|
ENGINE=IcebergAzure(azure, container = {cluster.azure_container_name}, storage_account_url = '{cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]}', blob_path = '/iceberg_data/default/{table_name}/', format={format})"""
|
||||||
|
|
||||||
elif storage_type == "hdfs":
|
elif storage_type == "hdfs":
|
||||||
if table_function:
|
if run_on_cluster:
|
||||||
|
assert table_function
|
||||||
return f"""
|
return f"""
|
||||||
icebergHDFS(hdfs, filename= 'iceberg_data/default/{table_name}/', format={format}, url = 'hdfs://hdfs1:9000/')
|
icebergHDFSCluster('cluster_simple', hdfs, filename= 'iceberg_data/default/{table_name}/', format={format}, url = 'hdfs://hdfs1:9000/')
|
||||||
"""
|
"""
|
||||||
else:
|
else:
|
||||||
return f"""
|
if table_function:
|
||||||
DROP TABLE IF EXISTS {table_name};
|
return f"""
|
||||||
CREATE TABLE {table_name}
|
icebergHDFS(hdfs, filename= 'iceberg_data/default/{table_name}/', format={format}, url = 'hdfs://hdfs1:9000/')
|
||||||
ENGINE=IcebergHDFS(hdfs, filename = 'iceberg_data/default/{table_name}/', format={format}, url = 'hdfs://hdfs1:9000/');"""
|
"""
|
||||||
|
else:
|
||||||
|
return f"""
|
||||||
|
DROP TABLE IF EXISTS {table_name};
|
||||||
|
CREATE TABLE {table_name}
|
||||||
|
ENGINE=IcebergHDFS(hdfs, filename = 'iceberg_data/default/{table_name}/', format={format}, url = 'hdfs://hdfs1:9000/');"""
|
||||||
|
|
||||||
elif storage_type == "local":
|
elif storage_type == "local":
|
||||||
|
assert not run_on_cluster
|
||||||
|
|
||||||
if table_function:
|
if table_function:
|
||||||
return f"""
|
return f"""
|
||||||
icebergLocal(local, path = '/iceberg_data/default/{table_name}/', format={format})
|
icebergLocal(local, path = '/iceberg_data/default/{table_name}/', format={format})
|
||||||
@ -227,6 +273,7 @@ def get_creation_expression(
|
|||||||
DROP TABLE IF EXISTS {table_name};
|
DROP TABLE IF EXISTS {table_name};
|
||||||
CREATE TABLE {table_name}
|
CREATE TABLE {table_name}
|
||||||
ENGINE=IcebergLocal(local, path = '/iceberg_data/default/{table_name}/', format={format});"""
|
ENGINE=IcebergLocal(local, path = '/iceberg_data/default/{table_name}/', format={format});"""
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise Exception(f"Unknown iceberg storage type: {storage_type}")
|
raise Exception(f"Unknown iceberg storage type: {storage_type}")
|
||||||
|
|
||||||
@ -492,6 +539,108 @@ def test_types(started_cluster, format_version, storage_type):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("format_version", ["1", "2"])
|
||||||
|
@pytest.mark.parametrize("storage_type", ["s3", "azure", "hdfs"])
|
||||||
|
def test_cluster_table_function(started_cluster, format_version, storage_type):
|
||||||
|
if is_arm() and storage_type == "hdfs":
|
||||||
|
pytest.skip("Disabled test IcebergHDFS for aarch64")
|
||||||
|
|
||||||
|
instance = started_cluster.instances["node1"]
|
||||||
|
spark = started_cluster.spark_session
|
||||||
|
|
||||||
|
TABLE_NAME = (
|
||||||
|
"test_iceberg_cluster_"
|
||||||
|
+ format_version
|
||||||
|
+ "_"
|
||||||
|
+ storage_type
|
||||||
|
+ "_"
|
||||||
|
+ get_uuid_str()
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_df(mode):
|
||||||
|
write_iceberg_from_df(
|
||||||
|
spark,
|
||||||
|
generate_data(spark, 0, 100),
|
||||||
|
TABLE_NAME,
|
||||||
|
mode=mode,
|
||||||
|
format_version=format_version,
|
||||||
|
)
|
||||||
|
|
||||||
|
files = default_upload_directory(
|
||||||
|
started_cluster,
|
||||||
|
storage_type,
|
||||||
|
f"/iceberg_data/default/{TABLE_NAME}/",
|
||||||
|
f"/iceberg_data/default/{TABLE_NAME}/",
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.info(f"Adding another dataframe. result files: {files}")
|
||||||
|
|
||||||
|
return files
|
||||||
|
|
||||||
|
files = add_df(mode="overwrite")
|
||||||
|
for i in range(1, len(started_cluster.instances)):
|
||||||
|
files = add_df(mode="append")
|
||||||
|
|
||||||
|
logging.info(f"Setup complete. files: {files}")
|
||||||
|
assert len(files) == 5 + 4 * (len(started_cluster.instances) - 1)
|
||||||
|
|
||||||
|
clusters = instance.query(f"SELECT * FROM system.clusters")
|
||||||
|
logging.info(f"Clusters setup: {clusters}")
|
||||||
|
|
||||||
|
# Regular Query only node1
|
||||||
|
table_function_expr = get_creation_expression(
|
||||||
|
storage_type, TABLE_NAME, started_cluster, table_function=True
|
||||||
|
)
|
||||||
|
select_regular = (
|
||||||
|
instance.query(f"SELECT * FROM {table_function_expr}").strip().split()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Cluster Query with node1 as coordinator
|
||||||
|
table_function_expr_cluster = get_creation_expression(
|
||||||
|
storage_type,
|
||||||
|
TABLE_NAME,
|
||||||
|
started_cluster,
|
||||||
|
table_function=True,
|
||||||
|
run_on_cluster=True,
|
||||||
|
)
|
||||||
|
select_cluster = (
|
||||||
|
instance.query(f"SELECT * FROM {table_function_expr_cluster}").strip().split()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Simple size check
|
||||||
|
assert len(select_regular) == 600
|
||||||
|
assert len(select_cluster) == 600
|
||||||
|
|
||||||
|
# Actual check
|
||||||
|
assert select_cluster == select_regular
|
||||||
|
|
||||||
|
# Check query_log
|
||||||
|
for replica in started_cluster.instances.values():
|
||||||
|
replica.query("SYSTEM FLUSH LOGS")
|
||||||
|
|
||||||
|
for node_name, replica in started_cluster.instances.items():
|
||||||
|
cluster_secondary_queries = (
|
||||||
|
replica.query(
|
||||||
|
f"""
|
||||||
|
SELECT query, type, is_initial_query, read_rows, read_bytes FROM system.query_log
|
||||||
|
WHERE
|
||||||
|
type = 'QueryStart' AND
|
||||||
|
positionCaseInsensitive(query, '{storage_type}Cluster') != 0 AND
|
||||||
|
position(query, '{TABLE_NAME}') != 0 AND
|
||||||
|
position(query, 'system.query_log') = 0 AND
|
||||||
|
NOT is_initial_query
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
.strip()
|
||||||
|
.split("\n")
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.info(
|
||||||
|
f"[{node_name}] cluster_secondary_queries: {cluster_secondary_queries}"
|
||||||
|
)
|
||||||
|
assert len(cluster_secondary_queries) == 1
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("format_version", ["1", "2"])
|
@pytest.mark.parametrize("format_version", ["1", "2"])
|
||||||
@pytest.mark.parametrize("storage_type", ["s3", "azure", "hdfs", "local"])
|
@pytest.mark.parametrize("storage_type", ["s3", "azure", "hdfs", "local"])
|
||||||
def test_delete_files(started_cluster, format_version, storage_type):
|
def test_delete_files(started_cluster, format_version, storage_type):
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
SELECT arrayWithConstant(96142475, ['qMUF']); -- { serverError TOO_LARGE_ARRAY_SIZE }
|
SELECT arrayWithConstant(96142475, ['qMUF']); -- { serverError TOO_LARGE_ARRAY_SIZE }
|
||||||
SELECT arrayWithConstant(100000000, materialize([[[[[[[[[['Hello, world!']]]]]]]]]])); -- { serverError TOO_LARGE_ARRAY_SIZE }
|
SELECT arrayWithConstant(100000000, materialize([[[[[[[[[['Hello, world!']]]]]]]]]])); -- { serverError TOO_LARGE_ARRAY_SIZE }
|
||||||
SELECT length(arrayWithConstant(10000000, materialize([[[[[[[[[['Hello world']]]]]]]]]])));
|
SELECT length(arrayWithConstant(10000000, materialize([[[[[[[[[['Hello world']]]]]]]]]])));
|
||||||
|
|
||||||
|
CREATE TEMPORARY TABLE args (value Array(Int)) ENGINE=Memory AS SELECT [1, 1, 1, 1] as value FROM numbers(1, 100);
|
||||||
|
SELECT length(arrayWithConstant(1000000, value)) FROM args FORMAT NULL;
|
||||||
|
11
tests/queries/0_stateless/03250_ephemeral_comment.sql
Normal file
11
tests/queries/0_stateless/03250_ephemeral_comment.sql
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
drop table if exists test;
|
||||||
|
CREATE TABLE test (
|
||||||
|
`start_s` UInt32 EPHEMERAL COMMENT 'start UNIX time' ,
|
||||||
|
`start_us` UInt16 EPHEMERAL COMMENT 'start microseconds',
|
||||||
|
`finish_s` UInt32 EPHEMERAL COMMENT 'finish UNIX time',
|
||||||
|
`finish_us` UInt16 EPHEMERAL COMMENT 'finish microseconds',
|
||||||
|
`captured` DateTime MATERIALIZED fromUnixTimestamp(start_s),
|
||||||
|
`duration` Decimal32(6) MATERIALIZED finish_s - start_s + (finish_us - start_us)/1000000
|
||||||
|
)
|
||||||
|
ENGINE Null;
|
||||||
|
drop table if exists test;
|
@ -244,7 +244,10 @@ Deduplication
|
|||||||
DefaultTableEngine
|
DefaultTableEngine
|
||||||
DelayedInserts
|
DelayedInserts
|
||||||
DeliveryTag
|
DeliveryTag
|
||||||
|
Deltalake
|
||||||
DeltaLake
|
DeltaLake
|
||||||
|
deltalakeCluster
|
||||||
|
deltaLakeCluster
|
||||||
Denormalize
|
Denormalize
|
||||||
DestroyAggregatesThreads
|
DestroyAggregatesThreads
|
||||||
DestroyAggregatesThreadsActive
|
DestroyAggregatesThreadsActive
|
||||||
@ -377,10 +380,15 @@ Homebrew's
|
|||||||
HorizontalDivide
|
HorizontalDivide
|
||||||
Hostname
|
Hostname
|
||||||
HouseOps
|
HouseOps
|
||||||
|
hudi
|
||||||
Hudi
|
Hudi
|
||||||
|
hudiCluster
|
||||||
|
HudiCluster
|
||||||
HyperLogLog
|
HyperLogLog
|
||||||
Hypot
|
Hypot
|
||||||
IANA
|
IANA
|
||||||
|
icebergCluster
|
||||||
|
IcebergCluster
|
||||||
IDE
|
IDE
|
||||||
IDEs
|
IDEs
|
||||||
IDNA
|
IDNA
|
||||||
|
Loading…
Reference in New Issue
Block a user