diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp index 12de08afad0..9917b9438d5 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.cpp +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -226,6 +226,27 @@ template class TableFunctionObjectStorage; +#if USE_AVRO && USE_AWS_S3 +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +#endif + +#if USE_AVRO && USE_AZURE_BLOB_STORAGE +template class TableFunctionObjectStorage; +#endif + +#if USE_AVRO && USE_HDFS +template class TableFunctionObjectStorage; +#endif + +#if USE_AWS_S3 && USE_PARQUET +template class TableFunctionObjectStorage; +#endif + +#if USE_AWS_S3 +template class TableFunctionObjectStorage; +#endif + #if USE_AVRO void registerTableFunctionIceberg(TableFunctionFactory & factory) { diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp index 5ca26aabe32..19f7a23af4d 100644 --- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp @@ -105,15 +105,79 @@ void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory) UNUSED(factory); } -#if USE_AWS_S3 -template class TableFunctionObjectStorageCluster; + +void registerTableFunctionIcebergCluster(TableFunctionFactory & factory) +{ + UNUSED(factory); + +#if USE_AVRO && USE_AWS_S3 + factory.registerFunction( + {.documentation + = {.description = R"(The table function can be used to read the Iceberg table stored on S3 object store in parallel for many nodes in a specified cluster. Alias to icebergS3)", + .examples{{"icebergCluster", "SELECT * FROM icebergCluster(cluster_name, url, access_key_id, secret_access_key)", ""}}, + .categories{"DataLake"}}, + .allow_readonly = false}); + + factory.registerFunction( + {.documentation + = {.description = R"(The table function can be used to read the Iceberg table stored on S3 object store in parallel for many nodes in a specified cluster.)", + .examples{{"icebergS3Cluster", "SELECT * FROM icebergS3Cluster(cluster_name, url, access_key_id, secret_access_key)", ""}}, + .categories{"DataLake"}}, + .allow_readonly = false}); #endif -#if USE_AZURE_BLOB_STORAGE -template class TableFunctionObjectStorageCluster; +#if USE_AVRO && USE_AZURE_BLOB_STORAGE + factory.registerFunction( + {.documentation + = {.description = R"(The table function can be used to read the Iceberg table stored on Azure object store in parallel for many nodes in a specified cluster.)", + .examples{{"icebergAzureCluster", "SELECT * FROM icebergAzureCluster(url, access_key_id, secret_access_key)", ""}}, + .categories{"DataLake"}}, + .allow_readonly = false}); #endif -#if USE_HDFS -template class TableFunctionObjectStorageCluster; +#if USE_AVRO && USE_HDFS + factory.registerFunction( + {.documentation + = {.description = R"(The table function can be used to read the Iceberg table stored on HDFS virtual filesystem in parallel for many nodes in a specified cluster.)", + .examples{{"icebergHDFSCluster", "SELECT * FROM icebergHDFSCluster(url)", ""}}, + .categories{"DataLake"}}, + .allow_readonly = false}); #endif } + +void registerTableFunctionDeltaLakeCluster([[maybe_unused]] TableFunctionFactory & factory) +{ + UNUSED(factory); + +#if USE_AWS_S3 && USE_PARQUET + factory.registerFunction( + {.documentation + = {.description = R"(The table function can be used to read the DeltaLake table stored on object store in parallel for many nodes in a specified cluster.)", + .examples{{"deltaLakeCluster", "SELECT * FROM deltaLakeCluster(url, access_key_id, secret_access_key)", ""}}, + .categories{"DataLake"}}, + .allow_readonly = false}); +#endif +} + +void registerTableFunctionHudiCluster([[maybe_unused]] TableFunctionFactory & factory) +{ + UNUSED(factory); + +#if USE_AWS_S3 + factory.registerFunction( + {.documentation + = {.description = R"(The table function can be used to read the Hudi table stored on object store in parallel for many nodes in a specified cluster.)", + .examples{{"hudiCluster", "SELECT * FROM hudiCluster(url, access_key_id, secret_access_key)", ""}}, + .categories{"DataLake"}}, + .allow_readonly = false}); +#endif +} + +void registerDataLakeClusterTableFunctions(TableFunctionFactory & factory) +{ + registerTableFunctionIcebergCluster(factory); + registerTableFunctionHudiCluster(factory); + registerTableFunctionDeltaLakeCluster(factory); +} + +} diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.h b/src/TableFunctions/TableFunctionObjectStorageCluster.h index 11e6c1fde82..2e031b45684 100644 --- a/src/TableFunctions/TableFunctionObjectStorageCluster.h +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.h @@ -33,6 +33,42 @@ struct HDFSClusterDefinition static constexpr auto storage_type_name = "HDFSCluster"; }; +struct IcebergClusterDefinition +{ + static constexpr auto name = "icebergCluster"; + static constexpr auto storage_type_name = "IcebergS3Cluster"; +}; + +struct IcebergS3ClusterDefinition +{ + static constexpr auto name = "icebergS3Cluster"; + static constexpr auto storage_type_name = "IcebergS3Cluster"; +}; + +struct IcebergAzureClusterDefinition +{ + static constexpr auto name = "icebergAzureCluster"; + static constexpr auto storage_type_name = "IcebergAzureCluster"; +}; + +struct IcebergHDFSClusterDefinition +{ + static constexpr auto name = "icebergHDFSCluster"; + static constexpr auto storage_type_name = "IcebergHDFSCluster"; +}; + +struct DeltaLakeClusterDefinition +{ + static constexpr auto name = "deltaLakeCluster"; + static constexpr auto storage_type_name = "DeltaLakeS3Cluster"; +}; + +struct HudiClusterDefinition +{ + static constexpr auto name = "hudiCluster"; + static constexpr auto storage_type_name = "HudiS3Cluster"; +}; + /** * Class implementing s3/hdfs/azureBlobStorageCluster(...) table functions, * which allow to process many files from S3/HDFS/Azure blob storage on a specific cluster. @@ -79,4 +115,26 @@ using TableFunctionAzureBlobCluster = TableFunctionObjectStorageCluster; #endif + +#if USE_AVRO && USE_AWS_S3 +using TableFunctionIcebergCluster = TableFunctionObjectStorageCluster; +using TableFunctionIcebergS3Cluster = TableFunctionObjectStorageCluster; +#endif + +#if USE_AVRO && USE_AZURE_BLOB_STORAGE +using TableFunctionIcebergAzureCluster = TableFunctionObjectStorageCluster; +#endif + +#if USE_AVRO && USE_HDFS +using TableFunctionIcebergHDFSCluster = TableFunctionObjectStorageCluster; +#endif + +#if USE_AWS_S3 && USE_PARQUET +using TableFunctionDeltaLakeCluster = TableFunctionObjectStorageCluster; +#endif + +#if USE_AWS_S3 +using TableFunctionHudiCluster = TableFunctionObjectStorageCluster; +#endif + } diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index fbe2c7c59ed..131ca783f73 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -66,6 +66,7 @@ void registerTableFunctions(bool use_legacy_mongodb_integration [[maybe_unused]] registerTableFunctionObjectStorage(factory); registerTableFunctionObjectStorageCluster(factory); registerDataLakeTableFunctions(factory); + registerDataLakeClusterTableFunctions(factory); } } diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index e22ba7346fa..1168a8ef739 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -70,6 +70,7 @@ void registerTableFunctionExplain(TableFunctionFactory & factory); void registerTableFunctionObjectStorage(TableFunctionFactory & factory); void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory); void registerDataLakeTableFunctions(TableFunctionFactory & factory); +void registerDataLakeClusterTableFunctions(TableFunctionFactory & factory); void registerTableFunctionTimeSeries(TableFunctionFactory & factory);